10 #ifndef MUELU_UTILITIESBASE_DEF_HPP
11 #define MUELU_UTILITIESBASE_DEF_HPP
17 #include <Kokkos_Core.hpp>
18 #include <KokkosSparse_CrsMatrix.hpp>
19 #include <KokkosSparse_getDiagCopy.hpp>
21 #include <Xpetra_BlockedVector.hpp>
22 #include <Xpetra_BlockedMap.hpp>
23 #include <Xpetra_BlockedMultiVector.hpp>
31 #include <Xpetra_CrsMatrixWrap.hpp>
32 #include <Xpetra_StridedMap.hpp>
37 #include <KokkosKernels_Handle.hpp>
38 #include <KokkosGraph_RCM.hpp>
42 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
43 RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
48 return rcp(
new CrsMatrixWrap(Op));
58 const bool keepDiagonal) {
60 using row_ptr_type =
typename crs_matrix::local_graph_type::row_map_type::non_const_type;
61 using col_idx_type =
typename crs_matrix::local_graph_type::entries_type::non_const_type;
62 using vals_type =
typename crs_matrix::local_matrix_type::values_type;
63 using execution_space =
typename crs_matrix::local_matrix_type::execution_space;
65 using ATS = Kokkos::ArithTraits<Scalar>;
66 using impl_SC =
typename ATS::val_type;
67 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
69 auto lclA = A->getLocalMatrixDevice();
71 auto rowptr = row_ptr_type(
"rowptr", lclA.numRows() + 1);
77 auto lclRowMap = A->
getRowMap()->getLocalMap();
78 auto lclColMap = A->getColMap()->getLocalMap();
79 Kokkos::parallel_scan(
80 "removeSmallEntries::rowptr",
81 Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
83 auto row = lclA.row(rlid);
84 auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid));
86 if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) {
91 rowptr(rlid + 1) = partial_nnz;
95 idx = col_idx_type(
"idx", nnz);
96 vals = vals_type(
"vals", nnz);
99 "removeSmallEntries::indicesValues",
100 Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
102 auto row = lclA.row(rlid);
103 auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid));
104 auto I = rowptr(rlid);
106 if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) {
107 idx(
I) = row.colidx(k);
108 vals(
I) = row.value(k);
116 Kokkos::parallel_scan(
117 "removeSmallEntries::rowptr",
118 Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
120 auto row = lclA.row(rlid);
122 if (impl_ATS::magnitude(row.value(k)) > threshold) {
127 rowptr(rlid + 1) = partial_nnz;
131 idx = col_idx_type(
"idx", nnz);
132 vals = vals_type(
"vals", nnz);
134 Kokkos::parallel_for(
135 "removeSmallEntries::indicesValues",
136 Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
138 auto row = lclA.row(rlid);
139 auto I = rowptr(rlid);
141 if (impl_ATS::magnitude(row.value(k)) > threshold) {
142 idx(
I) = row.colidx(k);
143 vals(
I) = row.value(k);
152 auto lclNewA =
typename crs_matrix::local_matrix_type(
"thresholdedMatrix", lclA.numRows(), lclA.numCols(), nnz, vals, rowptr, idx);
158 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
159 RCP<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
162 auto crsWrap = rcp_dynamic_cast<CrsMatrixWrap>(Ain);
163 if (!crsWrap.is_null()) {
164 auto crsMat = crsWrap->getCrsMatrix();
166 return rcp_static_cast<CrsMatrixWrap>(filteredMat);
171 RCP<CrsMatrixWrap> Aout =
rcp(
new CrsMatrixWrap(rowmap, expectedNNZperRow <= 0 ? Ain->getGlobalMaxNumRowEntries() : expectedNNZperRow));
173 for (
size_t row = 0; row < Ain->getLocalNumRows(); row++) {
174 size_t nnz = Ain->getNumEntriesInLocalRow(row);
178 Ain->getLocalRowView(row, indices, vals);
184 size_t nNonzeros = 0;
187 LocalOrdinal lclColIdx = colmap->getLocalElement(glbRow);
188 for (
size_t i = 0; i < (size_t)indices.
size(); i++) {
190 indout[nNonzeros] = colmap->getGlobalElement(indices[i]);
191 valout[nNonzeros] = vals[i];
196 for (
size_t i = 0; i < (size_t)indices.
size(); i++) {
198 indout[nNonzeros] = colmap->getGlobalElement(indices[i]);
199 valout[nNonzeros] = vals[i];
205 valout.resize(nNonzeros);
207 Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size()));
209 Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap());
214 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
219 RCP<CrsGraph> sparsityPattern = CrsGraphFactory::Build(A->getRowMap(), expectedNNZperRow <= 0 ? A->getGlobalMaxNumRowEntries() : expectedNNZperRow);
221 RCP<Vector> diag = GetMatrixOverlappedDiagonal(*A);
224 for (
size_t row = 0; row < A->getLocalNumRows(); row++) {
227 A->getLocalRowView(row, indices, vals);
229 GlobalOrdinal globalRow = A->getRowMap()->getGlobalElement(row);
230 LocalOrdinal col = A->getColMap()->getLocalElement(globalRow);
232 const Scalar Dk = STS::magnitude(D[col]) > 0.0 ? STS::magnitude(D[col]) : 1.0;
235 for (
size_t i = 0; i < size_t(indices.
size()); i++)
237 if (col == indices[i] || STS::magnitude(STS::squareroot(Dk) * vals[i] * STS::squareroot(Dk)) > STS::magnitude(threshold))
238 indicesNew.
append(A->getColMap()->getGlobalElement(indices[i]));
242 sparsityPattern->fillComplete();
244 return sparsityPattern;
247 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
251 size_t numRows = A.getRowMap()->getLocalNumElements();
255 for (
size_t i = 0; i < numRows; ++i) {
258 for (; j < cols.
size(); ++j) {
259 if (Teuchos::as<size_t>(cols[j]) == i) {
264 if (j == cols.
size()) {
272 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
276 const auto rowMap = A.getRowMap();
279 const CrsMatrixWrap* crsOp =
dynamic_cast<const CrsMatrixWrap*
>(&A);
281 using device_type =
typename CrsGraph::device_type;
282 Kokkos::View<size_t*, device_type> offsets(
"offsets", rowMap->getLocalNumElements());
283 crsOp->getCrsGraph()->getLocalDiagOffsets(offsets);
284 crsOp->getCrsMatrix()->getLocalDiagCopy(*diag, offsets);
286 A.getLocalDiagCopy(*diag);
308 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
314 const bool doLumped) {
327 using local_matrix_type =
typename Matrix::local_matrix_type;
329 using value_type =
typename local_matrix_type::value_type;
330 using values_type =
typename local_matrix_type::values_type;
331 using scalar_type =
typename values_type::non_const_value_type;
332 using ordinal_type =
typename local_matrix_type::ordinal_type;
333 using execution_space =
typename local_matrix_type::execution_space;
340 using KAT = Kokkos::ArithTraits<value_type>;
344 RCP<Vector> diag = VectorFactory::Build(rowMap,
false);
347 local_matrix_type localMatrix = A.getLocalMatrixDevice();
348 auto diagVals = diag->getDeviceLocalView(Xpetra::Access::ReadWrite);
350 ordinal_type numRows = localMatrix.graph.numRows();
352 scalar_type valReplacement_dev = valReplacement;
359 Kokkos::parallel_for(
360 "Utilities::GetMatrixDiagonalInverse",
361 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
362 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
363 bool foundDiagEntry =
false;
364 auto myRow = localMatrix.rowConst(rowIdx);
365 for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
366 if (myRow.colidx(entryIdx) == rowIdx) {
367 foundDiagEntry =
true;
368 if (KAT::magnitude(myRow.value(entryIdx)) > KAT::magnitude(tol)) {
369 diagVals(rowIdx, 0) = KAT::one() / myRow.value(entryIdx);
371 diagVals(rowIdx, 0) = valReplacement_dev;
377 if (!foundDiagEntry) {
378 diagVals(rowIdx, 0) = KAT::zero();
382 Kokkos::parallel_for(
383 "Utilities::GetMatrixDiagonalInverse",
384 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
385 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
386 auto myRow = localMatrix.rowConst(rowIdx);
387 for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
388 diagVals(rowIdx, 0) += KAT::magnitude(myRow.value(entryIdx));
390 if (KAT::magnitude(diagVals(rowIdx, 0)) > KAT::magnitude(tol))
391 diagVals(rowIdx, 0) = KAT::one() / diagVals(rowIdx, 0);
393 diagVals(rowIdx, 0) = valReplacement_dev;
399 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
405 const bool replaceSingleEntryRowWithZero,
406 const bool useAverageAbsDiagVal) {
410 const Scalar zero = TST::zero();
411 const Scalar one = TST::one();
412 const Scalar two = one + one;
418 if (bA == Teuchos::null) {
422 if (rowMap->lib() == Xpetra::UnderlyingLib::UseTpetra) {
425 using local_vector_type =
typename Vector::dual_view_type::t_dev_um;
426 using local_matrix_type =
typename Matrix::local_matrix_type;
427 using execution_space =
typename local_vector_type::execution_space;
430 using values_type =
typename local_matrix_type::values_type;
431 using scalar_type =
typename values_type::non_const_value_type;
432 using mag_type =
typename Kokkos::ArithTraits<scalar_type>::mag_type;
433 using KAT_S =
typename Kokkos::ArithTraits<scalar_type>;
434 using KAT_M =
typename Kokkos::ArithTraits<mag_type>;
435 using size_type =
typename local_matrix_type::non_const_size_type;
437 local_vector_type diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
438 local_matrix_type local_mat_dev = rcpA->getLocalMatrixDevice();
439 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
440 scalar_type valReplacement_dev = valReplacement;
443 Kokkos::View<int*, execution_space> nnzPerRow(
"nnz per rows", diag_dev.extent(0));
444 Kokkos::View<scalar_type*, execution_space> regSum(
"regSum", diag_dev.extent(0));
445 Kokkos::View<mag_type, execution_space> avgAbsDiagVal_dev(
"avgAbsDiagVal");
446 Kokkos::View<int, execution_space> numDiagsEqualToOne_dev(
"numDiagsEqualToOne");
450 Kokkos::parallel_for(
451 "GetLumpedMatrixDiagonal", my_policy,
452 KOKKOS_LAMBDA(
const int rowIdx) {
453 diag_dev(rowIdx, 0) = KAT_S::zero();
454 for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
455 entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
457 regSum(rowIdx) += local_mat_dev.values(entryIdx);
458 if (KAT_M::zero() < KAT_S::abs(local_mat_dev.values(entryIdx))) {
461 diag_dev(rowIdx, 0) += KAT_S::abs(local_mat_dev.values(entryIdx));
462 if (rowIdx == local_mat_dev.graph.entries(entryIdx)) {
463 Kokkos::atomic_add(&avgAbsDiagVal_dev(), KAT_S::abs(local_mat_dev.values(entryIdx)));
467 if (nnzPerRow(rowIdx) == 1 && KAT_S::magnitude(diag_dev(rowIdx, 0)) == KAT_M::one()) {
468 Kokkos::atomic_add(&numDiagsEqualToOne_dev(), 1);
472 if (useAverageAbsDiagVal) {
474 typename Kokkos::View<mag_type, execution_space>::HostMirror avgAbsDiagVal = Kokkos::create_mirror_view(avgAbsDiagVal_dev);
475 Kokkos::deep_copy(avgAbsDiagVal, avgAbsDiagVal_dev);
476 int numDiagsEqualToOne;
477 Kokkos::deep_copy(numDiagsEqualToOne, numDiagsEqualToOne_dev);
484 Kokkos::parallel_for(
485 "ComputeLumpedDiagonalInverse", my_policy,
486 KOKKOS_LAMBDA(
const int rowIdx) {
487 if (replaceSingleEntryRowWithZero && nnzPerRow(rowIdx) <= 1) {
488 diag_dev(rowIdx, 0) = KAT_S::zero();
489 }
else if ((diag_dev(rowIdx, 0) != KAT_S::zero()) && (KAT_S::magnitude(diag_dev(rowIdx, 0)) < KAT_S::magnitude(2 * regSum(rowIdx)))) {
490 diag_dev(rowIdx, 0) = KAT_S::one() / KAT_S::magnitude(2 * regSum(rowIdx));
492 if (KAT_S::magnitude(diag_dev(rowIdx, 0)) > tol) {
493 diag_dev(rowIdx, 0) = KAT_S::one() / diag_dev(rowIdx, 0);
495 diag_dev(rowIdx, 0) = valReplacement_dev;
503 Kokkos::parallel_for(
504 "GetLumpedMatrixDiagonal", my_policy,
505 KOKKOS_LAMBDA(
const int rowIdx) {
506 diag_dev(rowIdx, 0) = KAT_S::zero();
507 for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
508 entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
510 diag_dev(rowIdx, 0) += KAT_S::magnitude(local_mat_dev.values(entryIdx));
522 std::vector<int> nnzPerRow(rowMap->getLocalNumElements());
527 const Magnitude zeroMagn = TST::magnitude(zero);
528 Magnitude avgAbsDiagVal = TST::magnitude(zero);
529 int numDiagsEqualToOne = 0;
530 for (
size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
532 rcpA->getLocalRowView(i, cols, vals);
535 regSum[i] += vals[j];
536 const Magnitude rowEntryMagn = TST::magnitude(vals[j]);
537 if (rowEntryMagn > zeroMagn)
539 diagVals[i] += rowEntryMagn;
540 if (static_cast<size_t>(cols[j]) == i)
541 avgAbsDiagVal += rowEntryMagn;
543 if (nnzPerRow[i] == 1 && TST::magnitude(diagVals[i]) == 1.)
544 numDiagsEqualToOne++;
546 if (useAverageAbsDiagVal)
549 for (
size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
550 if (replaceSingleEntryRowWithZero && nnzPerRow[i] <= static_cast<int>(1))
552 else if ((diagVals[i] != zero) && (TST::magnitude(diagVals[i]) < TST::magnitude(two * regSum[i])))
553 diagVals[i] = one / TST::magnitude((two * regSum[i]));
555 if (TST::magnitude(diagVals[i]) > tol)
556 diagVals[i] = one / diagVals[i];
558 diagVals[i] = valReplacement;
566 "UtilitiesBase::GetLumpedMatrixDiagonal(): extracting reciprocal of diagonal of a blocked matrix is not supported");
569 for (
size_t row = 0; row < bA->Rows(); ++row) {
570 for (
size_t col = 0; col < bA->Cols(); ++col) {
571 if (!bA->getMatrix(row, col).
is_null()) {
574 RCP<Vector> ddtemp = bA->getRangeMapExtractor()->ExtractVector(diag, row, bThyraMode);
576 ddtemp->update(Teuchos::as<Scalar>(1.0), *dd, Teuchos::as<Scalar>(1.0));
577 bA->getRangeMapExtractor()->InsertVector(ddtemp, row, diag, bThyraMode);
586 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
595 using local_vector_type =
typename Vector::dual_view_type::t_dev_um;
596 using local_matrix_type =
typename Matrix::local_matrix_type;
597 using execution_space =
typename local_vector_type::execution_space;
598 using values_type =
typename local_matrix_type::values_type;
599 using scalar_type =
typename values_type::non_const_value_type;
600 using mag_type =
typename Kokkos::ArithTraits<scalar_type>::mag_type;
601 using KAT_S =
typename Kokkos::ArithTraits<scalar_type>;
602 using KAT_M =
typename Kokkos::ArithTraits<mag_type>;
603 using size_type =
typename local_matrix_type::non_const_size_type;
605 auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
606 auto local_mat_dev = A.getLocalMatrixDevice();
607 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
609 Kokkos::parallel_for(
610 "GetMatrixMaxMinusOffDiagonal", my_policy,
612 auto mymax = KAT_M::zero();
613 auto row = local_mat_dev.row(rowIdx);
614 for (
LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
615 if (rowIdx != row.colidx(entryIdx)) {
616 mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx)));
619 diag_dev(rowIdx, 0) = mymax;
625 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
629 TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.
getMap()), std::runtime_error,
"GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map.");
636 using local_vector_type =
typename Vector::dual_view_type::t_dev_um;
637 using local_matrix_type =
typename Matrix::local_matrix_type;
638 using execution_space =
typename local_vector_type::execution_space;
639 using values_type =
typename local_matrix_type::values_type;
640 using scalar_type =
typename values_type::non_const_value_type;
641 using mag_type =
typename Kokkos::ArithTraits<scalar_type>::mag_type;
642 using KAT_S =
typename Kokkos::ArithTraits<scalar_type>;
643 using KAT_M =
typename Kokkos::ArithTraits<mag_type>;
644 using size_type =
typename local_matrix_type::non_const_size_type;
646 auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
647 auto local_mat_dev = A.getLocalMatrixDevice();
648 auto local_block_dev = BlockNumber.getDeviceLocalView(Xpetra::Access::ReadOnly);
649 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
651 Kokkos::parallel_for(
652 "GetMatrixMaxMinusOffDiagonal", my_policy,
654 auto mymax = KAT_M::zero();
655 auto row = local_mat_dev.row(rowIdx);
656 for (
LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
657 if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) {
658 mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx)));
661 diag_dev(rowIdx, 0) = mymax;
667 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
675 if (bv.is_null() ==
false) {
679 for (
size_t r = 0; r < bmap->getNumMaps(); ++r) {
683 bret->setMultiVector(r, subvecinf, bmap->getThyraMode());
691 for (
size_t i = 0; i < v->getMap()->getLocalNumElements(); ++i) {
695 retVals[i] = valReplacement;
738 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
744 RCP<Vector> diagonal = VectorFactory::Build(colMap);
746 if (importer == Teuchos::null) {
747 importer = ImportFactory::Build(rowMap, colMap);
754 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
763 if (!browMap.
is_null()) rowMap = browMap->getMap();
769 for (
LO row = 0; row < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++row) {
777 for (
LO colID = 0; colID < static_cast<LO>(nnz); colID++) {
778 if (indices[colID] != row) {
787 if (importer == Teuchos::null) {
794 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
806 if (!browMap.
is_null()) rowMap = browMap->getMap();
812 for (
LO rowIdx = 0; rowIdx < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++rowIdx) {
820 for (
LO colID = 0; colID < static_cast<LO>(nnz); ++colID) {
821 if (indices[colID] != rowIdx) {
822 si += STS::magnitude(vals[colID]);
825 localVals[rowIdx] = si;
830 if (importer == Teuchos::null) {
837 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
842 const size_t numVecs = X.getNumVectors();
849 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
854 const size_t numVecs = X.getNumVectors();
855 Residual(Op, X, RHS, Resid);
861 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
866 const size_t numVecs = X.getNumVectors();
869 Op.residual(X, RHS, *RES);
873 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
878 Op.residual(X, RHS, Resid);
881 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
887 "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
892 diagInvVec = GetMatrixDiagonalInverse(A);
895 Scalar lambda = PowerMethod(A, diagInvVec, niters, tolerance, verbose, seed);
899 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
902 PowerMethod(
const Matrix& A,
const RCP<Vector>& diagInvVec,
905 "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
919 const Scalar zero = STS::zero(), one = STS::one();
922 Magnitude
residual = STS::magnitude(zero);
925 for (
int iter = 0;
iter < niters; ++
iter) {
927 q->update(one / norms[0], *z, zero);
929 if (diagInvVec != Teuchos::null)
930 z->elementWiseMultiply(one, *diagInvVec, *z, zero);
933 if (iter % 100 == 0 || iter + 1 == niters) {
934 r->update(1.0, *z, -lambda, *q, zero);
936 residual = STS::magnitude(norms[0] / lambda);
938 std::cout <<
"Iter = " <<
iter
939 <<
" Lambda = " << lambda
940 <<
" Residual of A*q - lambda*q = " << residual
944 if (residual < tolerance)
950 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
951 RCP<Teuchos::FancyOStream>
958 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
962 const size_t numVectors = v.size();
965 for (
size_t j = 0; j < numVectors; j++) {
966 d += (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
971 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
975 const size_t numVectors = v.size();
979 for (
size_t j = 0; j < numVectors; j++) {
980 d += Teuchos::as<MT>(weight[j]) * (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
985 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
992 if (count_twos_as_dirichlet) {
1000 for (col = 0; col < nnz; col++)
1001 if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1002 if (!boundaryNodes[row])
1004 boundaryNodes[row] =
false;
1007 boundaryNodes[row] =
true;
1017 for (
size_t col = 0; col < nnz; col++)
1018 if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1019 boundaryNodes[row] =
false;
1024 return boundaryNodes;
1027 template <
class SC,
class LO,
class GO,
class NO,
class memory_space>
1028 Kokkos::View<bool*, memory_space>
1031 const bool count_twos_as_dirichlet) {
1032 using impl_scalar_type =
typename Kokkos::ArithTraits<SC>::val_type;
1033 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
1034 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1037 Kokkos::View<bool*, typename NO::device_type::memory_space> boundaryNodes;
1039 if (helpers::isTpetraBlockCrs(A)) {
1041 auto b_graph = Am.getCrsGraph().getLocalGraphDevice();
1042 auto b_rowptr = Am.getCrsGraph().getLocalRowPtrsDevice();
1047 boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing(
"boundaryNodes"), numBlockRows);
1049 if (count_twos_as_dirichlet)
1052 Kokkos::parallel_for(
1053 "MueLu:Utils::DetectDirichletRowsBlockCrs", range_type(0, numBlockRows),
1054 KOKKOS_LAMBDA(
const LO row) {
1055 auto rowView = b_graph.rowConst(row);
1056 auto length = rowView.length;
1057 LO valstart = b_rowptr[row] * stride;
1059 boundaryNodes(row) =
true;
1060 decltype(length) colID = 0;
1061 for (; colID < length; colID++) {
1062 if (rowView.colidx(colID) != row) {
1063 LO current = valstart + colID * stride;
1064 for (
LO k = 0; k < stride; k++) {
1065 if (ATS::magnitude(values[current + k]) > tol) {
1066 boundaryNodes(row) =
false;
1071 if (boundaryNodes(row) ==
false)
1076 auto localMatrix = A.getLocalMatrixDevice();
1077 LO numRows = A.getLocalNumRows();
1078 boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing(
"boundaryNodes"), numRows);
1080 if (count_twos_as_dirichlet)
1081 Kokkos::parallel_for(
1082 "MueLu:Utils::DetectDirichletRows_Twos_As_Dirichlet", range_type(0, numRows),
1083 KOKKOS_LAMBDA(
const LO row) {
1084 auto rowView = localMatrix.row(row);
1085 auto length = rowView.length;
1087 boundaryNodes(row) =
true;
1089 decltype(length) colID = 0;
1090 for (; colID < length; colID++)
1091 if ((rowView.colidx(colID) != row) &&
1092 (ATS::magnitude(rowView.value(colID)) > tol)) {
1093 if (!boundaryNodes(row))
1095 boundaryNodes(row) =
false;
1097 if (colID == length)
1098 boundaryNodes(row) =
true;
1102 Kokkos::parallel_for(
1103 "MueLu:Utils::DetectDirichletRows", range_type(0, numRows),
1104 KOKKOS_LAMBDA(
const LO row) {
1105 auto rowView = localMatrix.row(row);
1106 auto length = rowView.length;
1108 boundaryNodes(row) =
true;
1109 for (decltype(length) colID = 0; colID < length; colID++)
1110 if ((rowView.colidx(colID) != row) &&
1111 (ATS::magnitude(rowView.value(colID)) > tol)) {
1112 boundaryNodes(row) =
false;
1117 if constexpr (std::is_same<memory_space, typename NO::device_type::memory_space>::value)
1118 return boundaryNodes;
1120 Kokkos::View<bool*, memory_space> boundaryNodes2(Kokkos::ViewAllocateWithoutInitializing(
"boundaryNodes"), boundaryNodes.extent(0));
1121 Kokkos::deep_copy(boundaryNodes2, boundaryNodes);
1122 return boundaryNodes2;
1125 Kokkos::View<bool*, memory_space> dummy(
"dummy", 0);
1129 template <
class SC,
class LO,
class GO,
class NO>
1130 Kokkos::View<bool*, typename NO::device_type::memory_space>
1134 const bool count_twos_as_dirichlet) {
1135 return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename NO::device_type::memory_space>(A, tol, count_twos_as_dirichlet);
1138 template <
class SC,
class LO,
class GO,
class NO>
1139 Kokkos::View<bool*, typename Kokkos::HostSpace>
1143 const bool count_twos_as_dirichlet) {
1144 return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename Kokkos::HostSpace>(A, tol, count_twos_as_dirichlet);
1147 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1152 bHasZeroDiagonal =
false;
1166 bool bHasDiag =
false;
1167 for (decltype(indices.
size()) col = 0; col < indices.
size(); col++) {
1168 if (indices[col] != row) {
1169 if (STS::magnitude(vals[col] / STS::magnitude(sqrt(STS::magnitude(diagVecData[row]) * STS::magnitude(diagVecData[col])))) > tol) {
1175 if (bHasDiag ==
false)
1176 bHasZeroDiagonal =
true;
1178 boundaryNodes[row] =
true;
1180 return boundaryNodes;
1183 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1190 for (
size_t i = 0; i < static_cast<size_t>(vals.
size()); i++) {
1196 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1199 Kokkos::View<bool*, typename Node::device_type> nonzeros) {
1200 using ATS = Kokkos::ArithTraits<Scalar>;
1201 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1202 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1204 const typename ATS::magnitudeType eps = 2.0 * impl_ATS::eps();
1206 Kokkos::parallel_for(
1207 "MueLu:Maxwell1::FindNonZeros", range_type(0, vals.extent(0)),
1208 KOKKOS_LAMBDA(
const size_t i) {
1209 nonzeros(i) = (impl_ATS::magnitude(vals(i, 0)) > eps);
1213 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1223 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.
size()) == rowMap->getLocalNumElements());
1224 TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.
size()) == colMap->getLocalNumElements());
1225 TEUCHOS_ASSERT(static_cast<size_t>(dirichletDomain.
size()) == domMap->getLocalNumElements());
1228 for (
size_t i = 0; i < (size_t)dirichletRows.
size(); i++) {
1229 if (dirichletRows[i]) {
1233 for (
size_t j = 0; j < static_cast<size_t>(indices.
size()); j++)
1234 myColsToZero->replaceLocalValue(indices[j], 0, one);
1243 globalColsToZero->doExport(*myColsToZero, *importer,
Xpetra::ADD);
1245 myColsToZero->doImport(*globalColsToZero, *importer,
Xpetra::INSERT);
1247 globalColsToZero = myColsToZero;
1249 FindNonZeros(globalColsToZero->getData(0), dirichletDomain);
1250 FindNonZeros(myColsToZero->getData(0), dirichletCols);
1253 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1256 const Kokkos::View<bool*, typename Node::device_type>& dirichletRows,
1257 Kokkos::View<bool*, typename Node::device_type> dirichletCols,
1258 Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
1259 using ATS = Kokkos::ArithTraits<Scalar>;
1260 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1261 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1265 TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getLocalNumElements());
1266 TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getLocalNumElements());
1267 TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getLocalNumElements());
1270 auto myColsToZeroView = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadWrite);
1271 auto localMatrix = A.getLocalMatrixDevice();
1272 Kokkos::parallel_for(
1273 "MueLu:Maxwell1::DetectDirichletCols", range_type(0, rowMap->getLocalNumElements()),
1275 if (dirichletRows(row)) {
1276 auto rowView = localMatrix.row(row);
1277 auto length = rowView.length;
1279 for (decltype(length) colID = 0; colID < length; colID++)
1280 myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1289 globalColsToZero->doExport(*myColsToZero, *importer,
Xpetra::ADD);
1291 myColsToZero->doImport(*globalColsToZero, *importer,
Xpetra::INSERT);
1293 globalColsToZero = myColsToZero;
1298 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1305 for (
LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1311 Scalar rowsum = STS::zero();
1312 Scalar diagval = STS::zero();
1314 for (
LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1317 diagval = vals[colID];
1318 rowsum += vals[colID];
1321 if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1323 dirichletRows[row] =
true;
1328 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1334 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1336 TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.
getMap()), std::runtime_error,
"ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1339 for (
LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1341 ArrayView<const LocalOrdinal> indices;
1342 ArrayView<const Scalar> vals;
1345 Scalar rowsum = STS::zero();
1346 Scalar diagval = STS::zero();
1347 for (
LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1350 diagval = vals[colID];
1351 if (block_id[row] == block_id[col])
1352 rowsum += vals[colID];
1356 if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1358 dirichletRows[row] =
true;
1364 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class memory_space>
1367 Kokkos::View<bool*, memory_space>& dirichletRows) {
1371 auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1372 Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1374 for (
LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1380 Scalar rowsum = STS::zero();
1381 Scalar diagval = STS::zero();
1382 for (
LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1385 diagval = vals[colID];
1386 rowsum += vals[colID];
1388 if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1389 dirichletRowsHost(row) =
true;
1392 Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1395 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1399 Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1400 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, rowSumTol, dirichletRows);
1403 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1407 Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1408 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, rowSumTol, dirichletRows);
1412 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class memory_space>
1416 Kokkos::View<bool*, memory_space>& dirichletRows) {
1420 TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.
getMap()), std::runtime_error,
"ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1422 auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1423 Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1426 for (
LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1432 Scalar rowsum = STS::zero();
1433 Scalar diagval = STS::zero();
1434 for (
LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1437 diagval = vals[colID];
1438 if (block_id[row] == block_id[col])
1439 rowsum += vals[colID];
1441 if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1442 dirichletRowsHost(row) =
true;
1445 Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1448 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1453 Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1454 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, BlockNumber, rowSumTol, dirichletRows);
1457 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1462 Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1463 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, BlockNumber, rowSumTol, dirichletRows);
1466 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1476 myColsToZero->putScalar(zero);
1478 for (
size_t i = 0; i < (size_t)dirichletRows.
size(); i++) {
1479 if (dirichletRows[i]) {
1483 for (
size_t j = 0; j < static_cast<size_t>(indices.
size()); j++)
1484 myColsToZero->replaceLocalValue(indices[j], 0, one);
1489 globalColsToZero->putScalar(zero);
1492 globalColsToZero->doExport(*myColsToZero, *exporter,
Xpetra::ADD);
1494 myColsToZero->doImport(*globalColsToZero, *exporter,
Xpetra::INSERT);
1498 for (
size_t i = 0; i < colMap->getLocalNumElements(); i++) {
1501 return dirichletCols;
1504 template <
class SC,
class LO,
class GO,
class NO>
1505 Kokkos::View<bool*, typename NO::device_type>
1508 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
1509 using ATS = Kokkos::ArithTraits<SC>;
1510 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1511 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1513 SC zero = ATS::zero();
1515 auto localMatrix = A.getLocalMatrixDevice();
1516 LO numRows = A.getLocalNumRows();
1521 myColsToZero->putScalar(zero);
1522 auto myColsToZeroView = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadWrite);
1524 Kokkos::parallel_for(
1525 "MueLu:Utils::DetectDirichletCols1", range_type(0, numRows),
1526 KOKKOS_LAMBDA(
const LO row) {
1527 if (dirichletRows(row)) {
1528 auto rowView = localMatrix.row(row);
1529 auto length = rowView.length;
1531 for (decltype(length) colID = 0; colID < length; colID++)
1532 myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1537 globalColsToZero->putScalar(zero);
1540 globalColsToZero->doExport(*myColsToZero, *exporter,
Xpetra::ADD);
1542 myColsToZero->doImport(*globalColsToZero, *exporter,
Xpetra::INSERT);
1544 auto myCols = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly);
1545 size_t numColEntries = colMap->getLocalNumElements();
1546 Kokkos::View<bool*, typename NO::device_type> dirichletCols(Kokkos::ViewAllocateWithoutInitializing(
"dirichletCols"), numColEntries);
1547 const typename ATS::magnitudeType eps = 2.0 * ATS::eps();
1549 Kokkos::parallel_for(
1550 "MueLu:Utils::DetectDirichletCols2", range_type(0, numColEntries),
1551 KOKKOS_LAMBDA(
const size_t i) {
1552 dirichletCols(i) = impl_ATS::magnitude(myCols(i, 0)) > eps;
1554 return dirichletCols;
1557 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1568 const Map& AColMap = *A.getColMap();
1569 const Map& BColMap = *B.getColMap();
1573 size_t nnzA = 0, nnzB = 0;
1589 size_t numRows = A.getLocalNumRows();
1590 for (
size_t i = 0; i < numRows; i++) {
1597 for (
size_t j = 0; j < nnzB; j++)
1598 valBAll[indB[j]] = valB[j];
1600 for (
size_t j = 0; j < nnzA; j++) {
1603 LocalOrdinal ind = BColMap.getLocalElement(AColMap.getGlobalElement(indA[j]));
1605 f += valBAll[ind] * valA[j];
1609 for (
size_t j = 0; j < nnzB; j++)
1610 valBAll[indB[j]] = zero;
1618 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1626 int maxint = INT_MAX;
1627 int mySeed = Teuchos::as<int>((maxint - 1) * (one - (comm.
getRank() + 1) / (comm.
getSize() + one)));
1628 if (mySeed < 1 || mySeed == maxint) {
1629 std::ostringstream errStr;
1630 errStr <<
"Error detected with random seed = " << mySeed <<
". It should be in the interval [1,2^31-2].";
1642 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1645 std::vector<LocalOrdinal>& dirichletRows,
bool count_twos_as_dirichlet) {
1647 dirichletRows.resize(0);
1648 for (
size_t i = 0; i < A->getLocalNumRows(); i++) {
1653 for (
size_t j = 0; j < (size_t)indices.
size(); j++) {
1658 if (nnz == 1 || (count_twos_as_dirichlet && nnz == 2)) {
1659 dirichletRows.push_back(i);
1664 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1667 const std::vector<LocalOrdinal>& dirichletRows) {
1673 for (
size_t i = 0; i < dirichletRows.size(); i++) {
1674 GlobalOrdinal row_gid = Rmap->getGlobalElement(dirichletRows[i]);
1681 for (
size_t j = 0; j < (size_t)indices.
size(); j++) {
1682 if (Cmap->getGlobalElement(indices[j]) == row_gid)
1690 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1699 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.
size()) == Rmap->getLocalNumElements());
1703 for (
size_t i = 0; i < (size_t)dirichletRows.
size(); i++) {
1704 if (dirichletRows[i]) {
1712 for (
size_t j = 0; j < (size_t)indices.
size(); j++) {
1713 if (Cmap->getGlobalElement(indices[j]) == row_gid)
1724 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1727 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
1729 using ATS = Kokkos::ArithTraits<Scalar>;
1730 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1731 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1738 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1740 auto localMatrix = A->getLocalMatrixDevice();
1741 auto localRmap = Rmap->getLocalMap();
1742 auto localCmap = Cmap->getLocalMap();
1744 Kokkos::parallel_for(
1745 "MueLu::Utils::ApplyOAZ", range_type(0, dirichletRows.extent(0)),
1747 if (dirichletRows(row)) {
1748 auto rowView = localMatrix.row(row);
1749 auto length = rowView.length;
1750 auto row_gid = localRmap.getGlobalElement(row);
1751 auto row_lid = localCmap.getLocalElement(row_gid);
1753 for (decltype(length) colID = 0; colID < length; colID++)
1754 if (rowView.colidx(colID) == row_lid)
1755 rowView.value(colID) = impl_ATS::one();
1757 rowView.value(colID) = impl_ATS::zero();
1762 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1765 const std::vector<LocalOrdinal>& dirichletRows,
1767 for (
size_t i = 0; i < dirichletRows.size(); i++) {
1773 for (
size_t j = 0; j < (size_t)indices.
size(); j++)
1774 valuesNC[j] = replaceWith;
1778 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1783 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.
size()) == A->getRowMap()->getLocalNumElements());
1784 for (
size_t i = 0; i < (size_t)dirichletRows.
size(); i++) {
1785 if (dirichletRows[i]) {
1791 for (
size_t j = 0; j < (size_t)indices.
size(); j++)
1792 valuesNC[j] = replaceWith;
1797 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1802 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.
size()) == X->getMap()->getLocalNumElements());
1803 for (
size_t i = 0; i < (size_t)dirichletRows.
size(); i++) {
1804 if (dirichletRows[i]) {
1805 for (
size_t j = 0; j < X->getNumVectors(); j++)
1806 X->replaceLocalValue(i, j, replaceWith);
1811 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1814 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1816 using ATS = Kokkos::ArithTraits<Scalar>;
1817 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1819 typename ATS::val_type impl_replaceWith = replaceWith;
1821 auto localMatrix = A->getLocalMatrixDevice();
1824 Kokkos::parallel_for(
1825 "MueLu:Utils::ZeroDirichletRows", range_type(0, numRows),
1827 if (dirichletRows(row)) {
1828 auto rowView = localMatrix.row(row);
1829 auto length = rowView.length;
1830 for (decltype(length) colID = 0; colID < length; colID++)
1831 rowView.value(colID) = impl_replaceWith;
1836 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1839 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1841 using ATS = Kokkos::ArithTraits<Scalar>;
1842 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1844 typename ATS::val_type impl_replaceWith = replaceWith;
1846 auto myCols = X->getDeviceLocalView(Xpetra::Access::ReadWrite);
1847 size_t numVecs = X->getNumVectors();
1848 Kokkos::parallel_for(
1849 "MueLu:Utils::ZeroDirichletRows_MV", range_type(0, dirichletRows.size()),
1850 KOKKOS_LAMBDA(
const size_t i) {
1851 if (dirichletRows(i)) {
1852 for (
size_t j = 0; j < numVecs; j++)
1853 myCols(i, j) = impl_replaceWith;
1858 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1863 TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.
size()) == A->getColMap()->getLocalNumElements());
1864 for (
size_t i = 0; i < A->getLocalNumRows(); i++) {
1867 A->getLocalRowView(i, indices, values);
1870 for (
size_t j = 0; j < static_cast<size_t>(indices.
size()); j++)
1871 if (dirichletCols[indices[j]])
1872 valuesNC[j] = replaceWith;
1876 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1879 const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
1881 using ATS = Kokkos::ArithTraits<Scalar>;
1882 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1884 typename ATS::val_type impl_replaceWith = replaceWith;
1886 auto localMatrix = A->getLocalMatrixDevice();
1889 Kokkos::parallel_for(
1890 "MueLu:Utils::ZeroDirichletCols", range_type(0, numRows),
1892 auto rowView = localMatrix.row(row);
1893 auto length = rowView.length;
1894 for (decltype(length) colID = 0; colID < length; colID++)
1895 if (dirichletCols(rowView.colidx(colID))) {
1896 rowView.value(colID) = impl_replaceWith;
1901 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1908 throw std::runtime_error(
"UtilitiesBase::FindDirichletRowsAndPropagateToCols row and domain maps must match.");
1911 bool has_import = !importer.
is_null();
1914 std::vector<LocalOrdinal> dirichletRows;
1915 FindDirichletRows(A, dirichletRows);
1918 printf(
"[%d] DirichletRow Ids = ",A->getRowMap()->getComm()->getRank());
1919 for(
size_t i=0; i<(size_t) dirichletRows.size(); i++)
1920 printf(
"%d ",dirichletRows[i]);
1933 for (
size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1934 dr[dirichletRows[i]] = 1;
1935 if (!has_import) dc[dirichletRows[i]] = 1;
1940 isDirichletCol->doImport(*isDirichletRow, *importer, Xpetra::CombineMode::ADD);
1943 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1947 using ISC =
typename Kokkos::ArithTraits<Scalar>::val_type;
1948 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1949 using local_matrix_type =
typename CrsMatrix::local_matrix_type;
1950 using values_type =
typename local_matrix_type::values_type;
1952 const ISC ONE = Kokkos::ArithTraits<ISC>::one();
1953 const ISC
ZERO = Kokkos::ArithTraits<ISC>::zero();
1956 auto localMatrix = original->getLocalMatrixDevice();
1958 values_type new_values(
"values", localMatrix.nnz());
1960 Kokkos::parallel_for(
1961 "ReplaceNonZerosWithOnes", range_type(0, localMatrix.nnz()), KOKKOS_LAMBDA(
const size_t i) {
1962 if (localMatrix.values(i) != ZERO)
1963 new_values(i) = ONE;
1965 new_values(i) = ZERO;
1971 NewMatrix->fillComplete(original->getDomainMap(), original->getRangeMap());
1975 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1986 if (!stridedMap.
is_null()) fullMap = stridedMap->getMap();
1989 const size_t numSubMaps = sourceBlockedMap.
getNumMaps();
1991 throw std::runtime_error(
"GenerateBlockedTargetMap(): Map compatibility error");
1996 for (
size_t i = 0; i < numSubMaps; i++) {
1999 for (
size_t j = 0; j < map->getLocalNumElements(); j++) {
2000 LocalOrdinal jj = fullMap->getLocalElement(map->getGlobalElement(j));
2001 block_ids->replaceLocalValue(jj, (
int)i);
2008 new_block_ids->doImport(*block_ids, Importer, Xpetra::CombineMode::ADD);
2014 for (
size_t i = 0; i < targetMap->getLocalNumElements(); i++) {
2015 elementsInSubMap[data[i]].
push_back(targetMap->getGlobalElement(i));
2019 std::vector<RCP<const Map>> subMaps(numSubMaps);
2020 for (
size_t i = 0; i < numSubMaps; i++) {
2025 return rcp(
new BlockedMap(targetMap, subMaps));
2028 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2034 const size_t numElements = rowElements.
size();
2036 if (
size_t(colElements.
size()) < numElements)
2039 bool goodMap =
true;
2040 for (
size_t i = 0; i < numElements; i++)
2041 if (rowElements[i] != colElements[i]) {
2049 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2054 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
2055 using lno_nnz_view_t =
typename local_graph_type::entries_type::non_const_type;
2056 using device =
typename local_graph_type::device_type;
2057 using execution_space =
typename local_matrix_type::execution_space;
2058 using ordinal_type =
typename local_matrix_type::ordinal_type;
2060 local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2062 lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2068 auto view1D = Kokkos::subview(retval->getDeviceLocalView(Xpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2069 Kokkos::parallel_for(
2070 "Utilities::ReverseCuthillMcKee",
2071 Kokkos::RangePolicy<ordinal_type, execution_space>(0, localGraph.numRows()),
2072 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
2073 view1D(rcmOrder(rowIdx)) = rowIdx;
2078 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2083 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
2084 using lno_nnz_view_t =
typename local_graph_type::entries_type::non_const_type;
2085 using device =
typename local_graph_type::device_type;
2086 using execution_space =
typename local_matrix_type::execution_space;
2087 using ordinal_type =
typename local_matrix_type::ordinal_type;
2089 local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2092 lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2098 auto view1D = Kokkos::subview(retval->getDeviceLocalView(Xpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2100 Kokkos::parallel_for(
2101 "Utilities::ReverseCuthillMcKee",
2102 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
2103 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
2104 view1D(rcmOrder(numRows - 1 - rowIdx)) = rowIdx;
2111 #define MUELU_UTILITIESBASE_SHORT
2112 #endif // MUELU_UTILITIESBASE_DEF_HPP
static void FindNonZeros(const Teuchos::ArrayRCP< const Scalar > vals, Teuchos::ArrayRCP< bool > nonzeros)
Find non-zero values in an ArrayRCP Compares the value to 2 * machine epsilon.
static RCP< Export< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target)
static RCP< Vector > GetMatrixDiagonal(const Matrix &A)
Extract Matrix Diagonal.
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows)
static void ZeroDirichletCols(Teuchos::RCP< Matrix > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static void DetectDirichletColsAndDomains(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< bool > &dirichletRows, Teuchos::ArrayRCP< bool > dirichletCols, Teuchos::ArrayRCP< bool > dirichletDomain)
Detects Dirichlet columns & domains from a list of Dirichlet rows.
#define MueLu_sumAll(rcpComm, in, out)
Array< T > & append(const T &x)
virtual int getSize() const =0
MueLu::DefaultLocalOrdinal LocalOrdinal
void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, memory_space > &dirichletRows)
virtual LO getBlockSize() const override
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > ReverseCuthillMcKee(const Matrix &Op)
static void FindDirichletRowsAndPropagateToCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, Teuchos::RCP< Xpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node >> &isDirichletRow, Teuchos::RCP< Xpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node >> &isDirichletCol)
virtual int getRank() const =0
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
static magnitudeType eps()
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Xpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType threshold, const bool keepDiagonal)
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Power method.
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
virtual const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
static RCP< Xpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > GetThresholdedGraph(const RCP< Matrix > &A, const Magnitude threshold, const GlobalOrdinal expectedNNZperRow=-1)
Threshold a graph.
virtual void resumeFill(const RCP< ParameterList > ¶ms=null)=0
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > ReplaceNonZerosWithOnes(const RCP< Matrix > &original)
Creates a copy of a matrix where the non-zero entries are replaced by ones.
size_t getLocalNumRows() const override
Exception throws to report incompatible objects (like maps).
static void SetRandomSeed(const Teuchos::Comm< int > &comm)
Set seed for random number generator.
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getTargetMap() const =0
static RCP< Time > getNewTimer(const std::string &name)
void resize(const size_type n, const T &val=T())
virtual void fillComplete(const RCP< const Map > &domainMap, const RCP< const Map > &rangeMap, const RCP< ParameterList > ¶ms=null)=0
virtual void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const =0
static RCP< MultiVector > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
static Teuchos::ArrayRCP< const bool > DetectDirichletCols(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< const bool > &dirichletRows)
Detect Dirichlet columns based on Dirichlet rows.
virtual void getLocalRowView(LocalOrdinal LocalRow, ArrayView< const LocalOrdinal > &indices, ArrayView< const Scalar > &values) const =0
static void FindDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, std::vector< LocalOrdinal > &dirichletRows, bool count_twos_as_dirichlet=false)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
Kokkos::View< bool *, memory_space > DetectDirichletRows_kokkos(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
static RCP< CrsMatrixWrap > GetThresholdedMatrix(const RCP< Matrix > &Ain, const Magnitude threshold, const bool keepDiagonal=true, const GlobalOrdinal expectedNNZperRow=-1)
Threshold a matrix.
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int >> &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
virtual bool isFillComplete() const =0
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
size_t getNumMaps() const
virtual void replaceLocalValues(LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &cols, const ArrayView< const Scalar > &vals)=0
static Teuchos::ArrayRCP< Scalar > GetMatrixDiagonal_arcp(const Matrix &A)
Extract Matrix Diagonal.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getMap() const =0
static RCP< const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > > GeneratedBlockedTargetMap(const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > &sourceBlockedMap, const Xpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer)
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
Teuchos::ScalarTraits< Scalar >::magnitudeType Magnitude
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
static Teuchos::RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static void seedrandom(unsigned int s)
static Teuchos::ArrayRCP< const bool > DetectDirichletRowsExt(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool &bHasZeroDiagonal, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::zero())
Detect Dirichlet rows (extended version)
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
const RCP< const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getMap(size_t i, bool bThyraMode=false) const
virtual UnderlyingLib lib() const
static magnitudeType magnitude(T a)
void push_back(const value_type &x)
virtual RCP< const CrsGraph > getCrsGraph() const =0
static Teuchos::Array< Magnitude > ResidualNorm(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
impl_scalar_type_dualview::t_dev::const_type getValuesDevice(const LO &lclRow) const
virtual Teuchos::RCP< const Map > getRangeMap() const =0
static RCP< Teuchos::FancyOStream > MakeFancy(std::ostream &os)
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
void residual(const Operator< SC, LO, GO, NO > &Aop, const MultiVector< SC, LO, GO, NO > &X_in, const MultiVector< SC, LO, GO, NO > &B_in, MultiVector< SC, LO, GO, NO > &R_in)
Exception throws to report errors in the internal logical of the program.
static Scalar Frobenius(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B)
Frobenius inner product of two matrices.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getSourceMap() const =0
#define TEUCHOS_ASSERT(assertion_test)
static Teuchos::RCP< Vector > GetInverse(Teuchos::RCP< const Vector > v, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero())
Return vector containing inverse of input vector.
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
static Kokkos::View< bool *, typename NO::device_type::memory_space > DetectDirichletRows_kokkos(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
static RCP< Matrix > Crs2Op(RCP< CrsMatrix > Op)
virtual Teuchos::ArrayRCP< const Scalar > getData(size_t j) const =0
virtual Teuchos::RCP< const Map > getDomainMap() const =0
static RCP< Import< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target, const Teuchos::RCP< Teuchos::ParameterList > &plist=Teuchos::null)