10 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
17 #include <Xpetra_MapFactory.hpp>
18 #include <Xpetra_Map.hpp>
20 #include <Xpetra_MultiVectorFactory.hpp>
22 #include <Xpetra_StridedMap.hpp>
28 #include <Kokkos_NestedSort.hpp>
29 #include <Kokkos_StdAlgorithms.hpp>
32 #include "MueLu_AmalgamationFactory.hpp"
33 #include "MueLu_AmalgamationInfo.hpp"
35 #include "MueLu_LWGraph.hpp"
38 #include "MueLu_LWGraph.hpp"
41 #include "MueLu_PreDropFunctionConstVal.hpp"
42 #include "MueLu_Utilities.hpp"
44 #ifdef HAVE_XPETRA_TPETRA
45 #include "Tpetra_CrsGraphTransposer.hpp"
59 template <
class real_type,
class LO>
68 DropTol(real_type val_, real_type diag_,
LO col_,
bool drop_)
89 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
93 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
101 SET_VALID_ENTRY(
"aggregation: distance laplacian directional weights");
106 validParamList->
getEntry(
"aggregation: drop scheme").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"signed classical sa",
"classical",
"distance laplacian",
"signed classical",
"block diagonal",
"block diagonal classical",
"block diagonal distance laplacian",
"block diagonal signed classical",
"block diagonal colored signed classical"))));
111 #undef SET_VALID_ENTRY
112 validParamList->
set<
bool>(
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
115 validParamList->
set<
RCP<const FactoryBase>>(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
119 return validParamList;
122 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
124 : predrop_(Teuchos::null) {}
126 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
128 Input(currentLevel,
"A");
129 Input(currentLevel,
"UnAmalgamationInfo");
132 if (pL.
get<
bool>(
"lightweight wrap") ==
true) {
133 std::string algo = pL.
get<std::string>(
"aggregation: drop scheme");
134 if (algo ==
"distance laplacian" || algo ==
"block diagonal distance laplacian") {
135 Input(currentLevel,
"Coordinates");
137 if (algo ==
"signed classical sa")
139 else if (algo.find(
"block diagonal") != std::string::npos || algo.find(
"signed classical") != std::string::npos) {
140 Input(currentLevel,
"BlockNumber");
145 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
150 typedef typename STS::magnitudeType real_type;
154 if (predrop_ != Teuchos::null)
155 GetOStream(
Parameters0) << predrop_->description();
157 RCP<Matrix> realA = Get<RCP<Matrix>>(currentLevel,
"A");
160 bool doExperimentalWrap = pL.get<
bool>(
"lightweight wrap");
162 GetOStream(
Parameters0) <<
"lightweight wrap = " << doExperimentalWrap << std::endl;
163 std::string algo = pL.get<std::string>(
"aggregation: drop scheme");
164 const bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
169 bool use_block_algorithm =
false;
170 LO interleaved_blocksize = as<LO>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize"));
171 bool useSignedClassicalRS =
false;
172 bool useSignedClassicalSA =
false;
173 bool generateColoringGraph =
false;
177 typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
181 if (algo ==
"distance laplacian") {
183 Coords = Get<RCP<RealValuedMultiVector>>(currentLevel,
"Coordinates");
185 }
else if (algo ==
"signed classical sa") {
186 useSignedClassicalSA =
true;
189 }
else if (algo ==
"signed classical" || algo ==
"block diagonal colored signed classical" || algo ==
"block diagonal signed classical") {
190 useSignedClassicalRS =
true;
195 if (!importer.is_null()) {
198 ghostedBlockNumber->doImport(*BlockNumber, *importer,
Xpetra::INSERT);
200 ghostedBlockNumber = BlockNumber;
203 if (algo ==
"block diagonal colored signed classical")
204 generateColoringGraph =
true;
208 }
else if (algo ==
"block diagonal") {
210 BlockDiagonalize(currentLevel, realA,
false);
212 }
else if (algo ==
"block diagonal classical" || algo ==
"block diagonal distance laplacian") {
214 use_block_algorithm =
true;
215 RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel, realA,
true);
216 if (algo ==
"block diagonal distance laplacian") {
219 if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
220 LO dim = (
LO)OldCoords->getNumVectors();
221 Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
222 for (
LO k = 0; k < dim; k++) {
225 for (
LO i = 0; i < (
LO)OldCoords->getLocalLength(); i++) {
226 LO new_base = i * dim;
227 for (
LO j = 0; j < interleaved_blocksize; j++)
228 new_vec[new_base + j] = old_vec[i];
234 algo =
"distance laplacian";
235 }
else if (algo ==
"block diagonal classical") {
247 enum { NO_WEIGHTS = 0,
250 int use_dlap_weights = NO_WEIGHTS;
251 if (algo ==
"distance laplacian") {
252 LO dim = (
LO)Coords->getNumVectors();
254 bool non_unity =
false;
255 for (
LO i = 0; !non_unity && i < (
LO)dlap_weights.
size(); i++) {
256 if (dlap_weights[i] != 1.0) {
261 LO blocksize = use_block_algorithm ? as<LO>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize")) : 1;
262 if ((
LO)dlap_weights.
size() == dim)
263 use_dlap_weights = SINGLE_WEIGHTS;
264 else if ((
LO)dlap_weights.
size() == blocksize * dim)
265 use_dlap_weights = BLOCK_WEIGHTS;
268 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
271 GetOStream(
Statistics1) <<
"Using distance laplacian weights: " << dlap_weights << std::endl;
286 if (doExperimentalWrap) {
292 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
293 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
295 threshold = as<SC>(pL.get<
double>(
"aggregation: drop tol"));
297 std::string distanceLaplacianAlgoStr = pL.get<std::string>(
"aggregation: distance laplacian algo");
298 std::string classicalAlgoStr = pL.get<std::string>(
"aggregation: classical algo");
299 real_type realThreshold = STS::magnitude(threshold);
303 #ifdef HAVE_MUELU_DEBUG
304 int distanceLaplacianCutVerbose = 0;
306 #ifdef DJS_READ_ENV_VARIABLES
307 if (getenv(
"MUELU_DROP_TOLERANCE_MODE")) {
308 distanceLaplacianAlgoStr = std::string(getenv(
"MUELU_DROP_TOLERANCE_MODE"));
311 if (getenv(
"MUELU_DROP_TOLERANCE_THRESHOLD")) {
312 auto tmp = atoi(getenv(
"MUELU_DROP_TOLERANCE_THRESHOLD"));
313 realThreshold = 1e-4 * tmp;
316 #ifdef HAVE_MUELU_DEBUG
317 if (getenv(
"MUELU_DROP_TOLERANCE_VERBOSE")) {
318 distanceLaplacianCutVerbose = atoi(getenv(
"MUELU_DROP_TOLERANCE_VERBOSE"));
326 if (algo ==
"distance laplacian") {
327 if (distanceLaplacianAlgoStr ==
"default")
329 else if (distanceLaplacianAlgoStr ==
"unscaled cut")
331 else if (distanceLaplacianAlgoStr ==
"scaled cut")
333 else if (distanceLaplacianAlgoStr ==
"scaled cut symmetric")
337 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
338 }
else if (algo ==
"classical") {
339 if (classicalAlgoStr ==
"default")
341 else if (classicalAlgoStr ==
"unscaled cut")
343 else if (classicalAlgoStr ==
"scaled cut")
347 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\" classical algorithm = \"" << classicalAlgoStr <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
350 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
352 if (((algo ==
"classical") && (classicalAlgoStr.find(
"scaled") != std::string::npos)) || ((algo ==
"distance laplacian") && (distanceLaplacianAlgoStr.find(
"scaled") != std::string::npos)))
355 Set<bool>(currentLevel,
"Filtering", (threshold != STS::zero()));
357 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
363 GO numDropped = 0, numTotal = 0;
364 std::string graphType =
"unamalgamated";
383 const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
386 if (algo ==
"classical") {
387 if (predrop_ == null) {
392 if (predrop_ != null) {
395 "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
398 if (newt != threshold) {
399 GetOStream(
Warnings0) <<
"switching threshold parameter from " << threshold <<
" (list) to " << newt <<
" (user function" << std::endl;
406 if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
415 numTotal = A->getLocalNumEntries();
418 GO numLocalBoundaryNodes = 0;
419 GO numGlobalBoundaryNodes = 0;
420 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
421 if (boundaryNodes[i])
422 numLocalBoundaryNodes++;
424 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
425 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
428 Set(currentLevel,
"DofsPerNode", 1);
429 Set(currentLevel,
"Graph", graph);
431 }
else if ((BlockSize == 1 && threshold != STS::zero()) ||
432 (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
433 (BlockSize == 1 && useSignedClassicalRS) ||
434 (BlockSize == 1 && useSignedClassicalSA)) {
440 typename LWGraph::row_type::non_const_type
rows(
"rows", A->getLocalNumRows() + 1);
441 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
443 using MT =
typename STS::magnitudeType;
448 if (useSignedClassicalRS) {
449 if (ghostedBlockNumber.
is_null()) {
451 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
453 GetOStream(
Statistics1) <<
"Calculated max point off-diagonal" << std::endl;
456 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
458 GetOStream(
Statistics1) <<
"Calculating max block off-diagonal" << std::endl;
463 ghostedDiagVals = ghostedDiag->getData(0);
467 if (rowSumTol > 0.) {
468 if (ghostedBlockNumber.
is_null()) {
470 GetOStream(
Statistics1) <<
"Applying point row sum criterion." << std::endl;
474 GetOStream(
Statistics1) <<
"Applying block row sum criterion." << std::endl;
480 if (!ghostedBlockNumber.
is_null())
481 g_block_id = ghostedBlockNumber->getData(0);
487 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
488 size_t nnz = A->getNumEntriesInLocalRow(row);
489 bool rowIsDirichlet = boundaryNodes[row];
492 A->getLocalRowView(row, indices, vals);
500 if (useSignedClassicalRS) {
502 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
503 LO col = indices[colID];
504 MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
505 MT neg_aij = -STS::real(vals[colID]);
510 if ((!rowIsDirichlet && (g_block_id.
is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
511 columns[realnnz++] = col;
516 rows(row + 1) = realnnz;
517 }
else if (useSignedClassicalSA) {
519 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
520 LO col = indices[colID];
522 bool is_nonpositive = STS::real(vals[colID]) <= 0;
523 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]);
524 MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID]));
530 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
531 columns(realnnz++) = col;
536 rows[row + 1] = realnnz;
539 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
540 LO col = indices[colID];
541 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]);
542 MT aij = STS::magnitude(vals[colID] * vals[colID]);
544 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
545 columns(realnnz++) = col;
550 rows(row + 1) = realnnz;
556 using ExecSpace =
typename Node::execution_space;
557 using TeamPol = Kokkos::TeamPolicy<ExecSpace>;
558 using TeamMem =
typename TeamPol::member_type;
559 #if KOKKOS_VERSION >= 40799
560 using ATS = KokkosKernels::ArithTraits<Scalar>;
562 using ATS = Kokkos::ArithTraits<Scalar>;
564 using impl_scalar_type =
typename ATS::val_type;
565 #if KOKKOS_VERSION >= 40799
566 using implATS = KokkosKernels::ArithTraits<impl_scalar_type>;
568 using implATS = Kokkos::ArithTraits<impl_scalar_type>;
572 auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getLocalViewDevice(Xpetra::Access::ReadOnly), Kokkos::ALL(), 0);
573 auto thresholdKokkos =
static_cast<impl_scalar_type
>(threshold);
574 auto realThresholdKokkos = implATS::magnitude(thresholdKokkos);
575 auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns);
577 auto A_device = A->getLocalMatrixDevice();
583 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
587 boundaryColumnVector->doImport(*boundaryNodesVector, *importer,
Xpetra::INSERT);
589 boundaryColumnVector = boundaryNodesVector;
591 auto boundaryColumn = boundaryColumnVector->getLocalViewDevice(Xpetra::Access::ReadOnly);
592 auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0);
594 Kokkos::View<LO*, ExecSpace> rownnzView(
"rownnzView", A_device.numRows());
595 auto drop_views = Kokkos::View<bool*, ExecSpace>(
"drop_views", A_device.nnz());
596 auto index_views = Kokkos::View<size_t*, ExecSpace>(
"index_views", A_device.nnz());
598 Kokkos::parallel_reduce(
599 "classical_cut", TeamPol(A_device.numRows(), Kokkos::AUTO), KOKKOS_LAMBDA(
const TeamMem& teamMember,
LO& globalnnz,
GO& totalDropped) {
600 LO row = teamMember.league_rank();
601 auto rowView = A_device.rowConst(row);
602 size_t nnz = rowView.length;
604 auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
605 auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
608 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (
LO)nnz), [&](
const LO colID) {
609 index_view(colID) = colID;
610 LO col = rowView.colidx(colID);
613 if (row == col || boundary(col)) {
614 drop_view(colID) =
true;
616 drop_view(colID) =
false;
620 size_t dropStart = nnz;
623 Kokkos::Experimental::sort_team(teamMember, index_view, [=](
size_t& x,
size_t& y) ->
bool {
624 if (drop_view(x) || drop_view(y)) {
625 return drop_view(x) < drop_view(y);
627 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
628 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
629 return x_aij > y_aij;
634 Kokkos::parallel_reduce(
635 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](
size_t i,
size_t& min) {
636 auto const& x = index_view(i - 1);
637 auto const& y = index_view(i);
638 typename implATS::magnitudeType x_aij = 0;
639 typename implATS::magnitudeType y_aij = 0;
641 x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
644 y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
647 if (realThresholdKokkos * realThresholdKokkos * x_aij > y_aij) {
653 Kokkos::Min<size_t>(dropStart));
656 Kokkos::Experimental::sort_team(teamMember, index_view, [=](
size_t& x,
size_t& y) ->
bool {
657 if (drop_view(x) || drop_view(y)) {
658 return drop_view(x) < drop_view(y);
660 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
661 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
662 auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
663 auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
664 return (x_aij / x_aiiajj) > (y_aij / y_aiiajj);
669 Kokkos::parallel_reduce(
670 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](
size_t i,
size_t& min) {
671 auto const& x = index_view(i - 1);
672 auto const& y = index_view(i);
673 typename implATS::magnitudeType x_val = 0;
674 typename implATS::magnitudeType y_val = 0;
676 typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
677 typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
678 x_val = x_aij / x_aiiajj;
681 typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
682 typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
683 y_val = y_aij / y_aiiajj;
686 if (realThresholdKokkos * realThresholdKokkos * x_val > y_val) {
692 Kokkos::Min<size_t>(dropStart));
696 if (dropStart < nnz) {
697 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](
size_t i) {
698 drop_view(index_view(i)) =
true;
704 Kokkos::parallel_reduce(
705 Kokkos::TeamThreadRange(teamMember, nnz), [=](
const size_t idxID,
LO& keep,
GO& drop) {
706 LO col = rowView.colidx(idxID);
708 if (row == col || !drop_view(idxID)) {
709 columnsDevice(A_device.graph.row_map(row) + idxID) = col;
712 columnsDevice(A_device.graph.row_map(row) + idxID) = -1;
719 totalDropped += rowDropped;
720 rownnzView(row) = rownnz;
722 realnnz, numDropped);
725 Kokkos::Experimental::remove(ExecSpace(), columnsDevice, -1);
726 Kokkos::deep_copy(columns, columnsDevice);
729 auto rowsDevice = Kokkos::create_mirror_view(ExecSpace(),
rows);
730 Kokkos::parallel_scan(
731 Kokkos::RangePolicy<ExecSpace>(0, A_device.numRows()), KOKKOS_LAMBDA(
const int i,
LO& partial_sum,
bool is_final) {
732 partial_sum += rownnzView(i);
733 if (is_final) rowsDevice(i + 1) = partial_sum;
735 Kokkos::deep_copy(
rows, rowsDevice);
738 numTotal = A->getLocalNumEntries();
740 if (aggregationMayCreateDirichlet) {
742 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
744 boundaryNodes[row] =
true;
748 RCP<LWGraph> graph =
rcp(
new LWGraph(
rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(),
"thresholded graph of A"));
751 GO numLocalBoundaryNodes = 0;
752 GO numGlobalBoundaryNodes = 0;
753 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
754 if (boundaryNodes(i))
755 numLocalBoundaryNodes++;
757 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
758 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
760 Set(currentLevel,
"Graph", graph);
761 Set(currentLevel,
"DofsPerNode", 1);
764 if (generateColoringGraph) {
767 BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer);
768 Set(currentLevel,
"Coloring Graph", colorGraph);
788 }
else if (BlockSize > 1 && threshold == STS::zero()) {
793 graphType =
"amalgamated";
801 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
802 Array<LO> colTranslation = *(amalInfo->getColTranslation());
805 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
808 typename LWGraph::row_type::non_const_type
rows(
"rows", numRows + 1);
809 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
812 Kokkos::deep_copy(amalgBoundaryNodes,
false);
824 LO blkSize = A->GetFixedBlockSize();
826 LO blkPartSize = A->GetFixedBlockSize();
827 if (A->IsView(
"stridedMaps") ==
true) {
831 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
832 blkId = strMap->getStridedBlockId();
834 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
841 for (
LO row = 0; row < numRows; row++) {
851 bool isBoundary =
false;
852 if (pL.get<
bool>(
"aggregation: greedy Dirichlet") ==
true) {
853 for (
LO j = 0; j < blkPartSize; j++) {
854 if (pointBoundaryNodes[row * blkPartSize + j]) {
861 for (
LO j = 0; j < blkPartSize; j++) {
862 if (!pointBoundaryNodes[row * blkPartSize + j]) {
872 MergeRows(*A, row, indicesExtra, colTranslation);
875 indices = indicesExtra;
876 numTotal += indices.
size();
880 LO nnz = indices.
size(), rownnz = 0;
881 for (
LO colID = 0; colID < nnz; colID++) {
882 LO col = indices[colID];
883 columns(realnnz++) = col;
894 amalgBoundaryNodes[row] =
true;
896 rows(row + 1) = realnnz;
899 RCP<LWGraph> graph =
rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
903 GO numLocalBoundaryNodes = 0;
904 GO numGlobalBoundaryNodes = 0;
906 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
907 if (amalgBoundaryNodes(i))
908 numLocalBoundaryNodes++;
911 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
912 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes
913 <<
" agglomerated Dirichlet nodes" << std::endl;
916 Set(currentLevel,
"Graph", graph);
917 Set(currentLevel,
"DofsPerNode", blkSize);
919 }
else if (BlockSize > 1 && threshold != STS::zero()) {
923 graphType =
"amalgamated";
931 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
932 Array<LO> colTranslation = *(amalInfo->getColTranslation());
935 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
938 typename LWGraph::row_type::non_const_type
rows(
"rows", numRows + 1);
939 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
942 Kokkos::deep_copy(amalgBoundaryNodes,
false);
953 LO blkSize = A->GetFixedBlockSize();
955 LO blkPartSize = A->GetFixedBlockSize();
956 if (A->IsView(
"stridedMaps") ==
true) {
960 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
961 blkId = strMap->getStridedBlockId();
963 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
974 for (
LO row = 0; row < numRows; row++) {
984 bool isBoundary =
false;
985 if (pL.get<
bool>(
"aggregation: greedy Dirichlet") ==
true) {
986 for (
LO j = 0; j < blkPartSize; j++) {
987 if (pointBoundaryNodes[row * blkPartSize + j]) {
994 for (
LO j = 0; j < blkPartSize; j++) {
995 if (!pointBoundaryNodes[row * blkPartSize + j]) {
1005 MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
1008 indices = indicesExtra;
1009 numTotal += indices.
size();
1013 LO nnz = indices.
size(), rownnz = 0;
1014 for (
LO colID = 0; colID < nnz; colID++) {
1015 LO col = indices[colID];
1016 columns[realnnz++] = col;
1027 amalgBoundaryNodes[row] =
true;
1029 rows[row + 1] = realnnz;
1033 RCP<LWGraph> graph =
rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
1037 GO numLocalBoundaryNodes = 0;
1038 GO numGlobalBoundaryNodes = 0;
1040 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1041 if (amalgBoundaryNodes(i))
1042 numLocalBoundaryNodes++;
1045 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1046 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes
1047 <<
" agglomerated Dirichlet nodes" << std::endl;
1050 Set(currentLevel,
"Graph", graph);
1051 Set(currentLevel,
"DofsPerNode", blkSize);
1054 }
else if (algo ==
"distance laplacian") {
1055 LO blkSize = A->GetFixedBlockSize();
1056 GO indexBase = A->getRowMap()->getIndexBase();
1071 if ((blkSize == 1) && (threshold == STS::zero())) {
1075 graphType =
"unamalgamated";
1076 numTotal = A->getLocalNumEntries();
1079 GO numLocalBoundaryNodes = 0;
1080 GO numGlobalBoundaryNodes = 0;
1081 for (
size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1082 if (pointBoundaryNodes(i))
1083 numLocalBoundaryNodes++;
1085 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1086 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1089 Set(currentLevel,
"DofsPerNode", blkSize);
1090 Set(currentLevel,
"Graph", graph);
1106 "Coordinate vector length (" << Coords->getLocalLength() <<
") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() <<
") by modulo block size (" << blkSize <<
").");
1112 uniqueMap = A->getRowMap();
1113 nonUniqueMap = A->getColMap();
1114 graphType =
"unamalgamated";
1117 uniqueMap = Coords->getMap();
1119 "Different index bases for matrix and coordinates");
1123 graphType =
"amalgamated";
1125 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1130 if (threshold != STS::zero()) {
1135 if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1136 GetOStream(
Warnings1) <<
"Using existing importer from matrix graph" << std::endl;
1137 importer = realA->getCrsGraph()->getImporter();
1139 GetOStream(
Warnings0) <<
"Constructing new importer instance" << std::endl;
1140 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1150 RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1153 if (threshold != STS::zero()) {
1154 const size_t numVectors = ghostedCoords->getNumVectors();
1155 coordData.
reserve(numVectors);
1156 for (
size_t j = 0; j < numVectors; j++) {
1163 ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1164 for (
LO row = 0; row < numRows; row++) {
1169 A->getLocalRowView(row, indices, vals);
1174 MergeRows(*A, row, indicesExtra, colTranslation);
1175 indices = indicesExtra;
1179 bool haveAddedToDiag =
false;
1180 for (
LO colID = 0; colID < nnz; colID++) {
1181 const LO col = indices[colID];
1184 if (use_dlap_weights == SINGLE_WEIGHTS) {
1189 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1190 int block_id = row % interleaved_blocksize;
1191 int block_start = block_id * interleaved_blocksize;
1197 haveAddedToDiag =
true;
1202 if (!haveAddedToDiag)
1203 localLaplDiagData[row] = STS::squareroot(STS::rmax());
1208 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1209 ghostedLaplDiag->doImport(*localLaplDiag, *importer,
Xpetra::INSERT);
1210 ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1214 GetOStream(
Runtime0) <<
"Skipping distance laplacian construction due to 0 threshold" << std::endl;
1220 typename LWGraph::row_type::non_const_type
rows(
"rows", numRows + 1);
1221 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
1223 #ifdef HAVE_MUELU_DEBUG
1225 for (
LO i = 0; i < (
LO)columns.size(); i++) columns[i] = -666;
1230 bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo ==
scaled_cut_symmetric;
1233 rows_stop.
resize(numRows);
1236 Kokkos::deep_copy(amalgBoundaryNodes,
false);
1245 if (threshold != STS::zero()) {
1246 const size_t numVectors = ghostedCoords->getNumVectors();
1247 coordData.
reserve(numVectors);
1248 for (
size_t j = 0; j < numVectors; j++) {
1255 for (
LO row = 0; row < numRows; row++) {
1258 bool isBoundary =
false;
1262 A->getLocalRowView(row, indices, vals);
1263 isBoundary = pointBoundaryNodes[row];
1267 for (
LO j = 0; j < blkSize; j++) {
1268 if (!pointBoundaryNodes[row * blkSize + j]) {
1276 MergeRows(*A, row, indicesExtra, colTranslation);
1279 indices = indicesExtra;
1281 numTotal += indices.
size();
1283 LO nnz = indices.
size(), rownnz = 0;
1285 if (use_stop_array) {
1287 realnnz =
rows(row);
1290 if (threshold != STS::zero()) {
1294 for (
LO colID = 0; colID < nnz; colID++) {
1295 LO col = indices[colID];
1298 columns(realnnz++) = col;
1304 if (isBoundary)
continue;
1307 if (use_dlap_weights == SINGLE_WEIGHTS) {
1309 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1310 int block_id = row % interleaved_blocksize;
1311 int block_start = block_id * interleaved_blocksize;
1316 real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1317 real_type aij = STS::magnitude(laplVal * laplVal);
1320 columns(realnnz++) = col;
1329 std::vector<DropTol> drop_vec;
1330 drop_vec.reserve(nnz);
1335 for (
LO colID = 0; colID < nnz; colID++) {
1336 LO col = indices[colID];
1339 drop_vec.emplace_back(zero, one, colID,
false);
1343 if (isBoundary)
continue;
1346 if (use_dlap_weights == SINGLE_WEIGHTS) {
1348 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1349 int block_id = row % interleaved_blocksize;
1350 int block_start = block_id * interleaved_blocksize;
1356 real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1357 real_type aij = STS::magnitude(laplVal * laplVal);
1359 drop_vec.emplace_back(aij, aiiajj, colID,
false);
1362 const size_t n = drop_vec.size();
1365 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1366 return a.val > b.val;
1370 for (
size_t i = 1; i < n; ++i) {
1372 auto const& x = drop_vec[i - 1];
1373 auto const& y = drop_vec[i];
1376 if (realThreshold * realThreshold * a > b) {
1378 #ifdef HAVE_MUELU_DEBUG
1379 if (distanceLaplacianCutVerbose) {
1380 std::cout <<
"DJS: KEEP, N, ROW: " << i + 1 <<
", " << n <<
", " << row << std::endl;
1385 drop_vec[i].drop = drop;
1388 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1389 return a.val / a.diag > b.val / b.diag;
1393 for (
size_t i = 1; i < n; ++i) {
1395 auto const& x = drop_vec[i - 1];
1396 auto const& y = drop_vec[i];
1397 auto a = x.val / x.diag;
1398 auto b = y.val / y.diag;
1399 if (realThreshold * realThreshold * a > b) {
1401 #ifdef HAVE_MUELU_DEBUG
1402 if (distanceLaplacianCutVerbose) {
1403 std::cout <<
"DJS: KEEP, N, ROW: " << i + 1 <<
", " << n <<
", " << row << std::endl;
1408 drop_vec[i].drop = drop;
1412 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1413 return a.col < b.col;
1416 for (
LO idxID = 0; idxID < (
LO)drop_vec.size(); idxID++) {
1417 LO col = indices[drop_vec[idxID].col];
1421 columns(realnnz++) = col;
1427 if (!drop_vec[idxID].drop) {
1428 columns(realnnz++) = col;
1439 for (
LO colID = 0; colID < nnz; colID++) {
1440 LO col = indices[colID];
1441 columns(realnnz++) = col;
1453 amalgBoundaryNodes[row] =
true;
1457 rows_stop[row] = rownnz + rows[row];
1459 rows[row + 1] = realnnz;
1464 if (use_stop_array) {
1467 for (
LO row = 0; row < numRows; row++) {
1468 for (
LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1469 LO col = columns[colidx];
1470 if (col >= numRows)
continue;
1473 for (
LO t_col =
rows(col); !found && t_col < rows_stop[col]; t_col++) {
1474 if (columns[t_col] == row)
1479 if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1480 LO new_idx = rows_stop[col];
1482 columns[new_idx] = row;
1490 LO current_start = 0;
1491 for (
LO row = 0; row < numRows; row++) {
1492 LO old_start = current_start;
1493 for (
LO col =
rows(row); col < rows_stop[row]; col++) {
1494 if (current_start != col) {
1495 columns(current_start) = columns(col);
1499 rows[row] = old_start;
1501 rows(numRows) = realnnz = current_start;
1507 graph =
rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
1512 GO numLocalBoundaryNodes = 0;
1513 GO numGlobalBoundaryNodes = 0;
1515 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1516 if (amalgBoundaryNodes(i))
1517 numLocalBoundaryNodes++;
1520 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1521 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" agglomerated Dirichlet nodes"
1522 <<
" using threshold " << dirichletThreshold << std::endl;
1525 Set(currentLevel,
"Graph", graph);
1526 Set(currentLevel,
"DofsPerNode", blkSize);
1532 GO numGlobalTotal, numGlobalDropped;
1535 GetOStream(
Statistics1) <<
"Number of dropped entries in " << graphType <<
" matrix graph: " << numGlobalDropped <<
"/" << numGlobalTotal;
1536 if (numGlobalTotal != 0)
1537 GetOStream(
Statistics1) <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)";
1544 SC threshold = as<SC>(pL.get<
double>(
"aggregation: drop tol"));
1546 GetOStream(
Runtime0) <<
"algorithm = \""
1548 <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
1549 Set<bool>(currentLevel,
"Filtering", (threshold != STS::zero()));
1555 GO indexBase = rowMap->getIndexBase();
1559 if (A->IsView(
"stridedMaps") &&
1560 Teuchos::rcp_dynamic_cast<
const StridedMap>(A->getRowMap(
"stridedMaps")) != Teuchos::null) {
1564 blockdim = strMap->getFixedBlockSize();
1565 offset = strMap->getOffset();
1566 oldView = A->SwitchToView(oldView);
1567 GetOStream(
Statistics1) <<
"CoalesceDropFactory::Build():"
1568 <<
" found blockdim=" << blockdim <<
" from strided maps. offset=" << offset << std::endl;
1570 GetOStream(
Statistics1) <<
"CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1575 GetOStream(
Statistics1) <<
"CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() <<
"/" << nodeMap->getGlobalNumElements() <<
" elements" << std::endl;
1578 RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1580 LO numRows = A->getRowMap()->getLocalNumElements();
1581 LO numNodes = nodeMap->getLocalNumElements();
1583 Kokkos::deep_copy(amalgBoundaryNodes,
false);
1584 const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0);
1585 bool bIsDiagonalEntry =
false;
1590 for (
LO row = 0; row < numRows; row++) {
1592 GO grid = rowMap->getGlobalElement(row);
1595 bIsDiagonalEntry =
false;
1600 size_t nnz = A->getNumEntriesInLocalRow(row);
1603 A->getLocalRowView(row, indices, vals);
1607 for (
LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1608 GO gcid = colMap->getGlobalElement(indices[col]);
1610 if (vals[col] != STS::zero()) {
1612 cnodeIds->push_back(cnodeId);
1614 if (grid == gcid) bIsDiagonalEntry =
true;
1618 if (realnnz == 1 && bIsDiagonalEntry ==
true) {
1619 LO lNodeId = nodeMap->getLocalElement(nodeId);
1620 numberDirichletRowsPerNode[lNodeId] += 1;
1621 if (numberDirichletRowsPerNode[lNodeId] == blockdim)
1622 amalgBoundaryNodes[lNodeId] =
true;
1627 if (arr_cnodeIds.
size() > 0)
1628 crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1631 crsGraph->fillComplete(nodeMap, nodeMap);
1640 GO numLocalBoundaryNodes = 0;
1641 GO numGlobalBoundaryNodes = 0;
1642 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1643 if (amalgBoundaryNodes(i))
1644 numLocalBoundaryNodes++;
1646 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1647 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1652 Set(currentLevel,
"DofsPerNode", blockdim);
1653 Set(currentLevel,
"Graph", graph);
1659 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1664 LO blkSize = A.GetFixedBlockSize();
1665 if (A.IsView(
"stridedMaps") ==
true) {
1669 if (strMap->getStridedBlockId() > -1)
1670 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1674 size_t nnz = 0, pos = 0;
1675 for (
LO j = 0; j < blkSize; j++)
1676 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1688 for (
LO j = 0; j < blkSize; j++) {
1689 A.getLocalRowView(row * blkSize + j, inds, vals);
1690 size_type numIndices = inds.
size();
1692 if (numIndices == 0)
1696 cols[pos++] = translation[inds[0]];
1697 for (size_type k = 1; k < numIndices; k++) {
1698 LO nodeID = translation[inds[k]];
1702 if (nodeID != cols[pos - 1])
1703 cols[pos++] = nodeID;
1710 std::sort(cols.
begin(), cols.
end());
1712 for (
size_t j = 1; j < nnz; j++)
1713 if (cols[j] != cols[pos])
1714 cols[++pos] = cols[j];
1718 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1724 LO blkSize = A.GetFixedBlockSize();
1725 if (A.IsView(
"stridedMaps") ==
true) {
1729 if (strMap->getStridedBlockId() > -1)
1730 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1734 size_t nnz = 0, pos = 0;
1735 for (
LO j = 0; j < blkSize; j++)
1736 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1748 for (
LO j = 0; j < blkSize; j++) {
1749 A.getLocalRowView(row * blkSize + j, inds, vals);
1750 size_type numIndices = inds.
size();
1752 if (numIndices == 0)
1757 for (size_type k = 0; k < numIndices; k++) {
1759 LO nodeID = translation[inds[k]];
1762 typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]);
1763 typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]);
1766 if (aij > aiiajj || (row * blkSize + j == dofID)) {
1772 if (nodeID != prevNodeID) {
1773 cols[pos++] = nodeID;
1774 prevNodeID = nodeID;
1783 std::sort(cols.
begin(), cols.
end());
1785 for (
size_t j = 1; j < nnz; j++)
1786 if (cols[j] != cols[pos])
1787 cols[++pos] = cols[j];
1793 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1798 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.
get<
double>(
"aggregation: Dirichlet threshold")));
1799 const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.
get<
double>(
"aggregation: row sum drop tol"));
1803 GetOStream(
Statistics1) <<
"Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
1810 ghostedBlockNumber->doImport(*BlockNumber, *importer,
Xpetra::INSERT);
1812 ghostedBlockNumber = BlockNumber;
1820 typename CrsMatrix::local_matrix_type::row_map_type::host_mirror_type::non_const_type rows_mat;
1821 typename LWGraph::row_type::non_const_type rows_graph;
1822 typename LWGraph::entries_type::non_const_type columns;
1823 typename CrsMatrix::local_matrix_type::values_type::host_mirror_type::non_const_type values;
1826 if (generate_matrix) {
1827 crs_matrix_wrap =
rcp(
new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1828 rows_mat =
typename CrsMatrix::local_matrix_type::row_map_type::host_mirror_type::non_const_type(
"rows_mat", A->getLocalNumRows() + 1);
1830 rows_graph =
typename LWGraph::row_type::non_const_type(
"rows_graph", A->getLocalNumRows() + 1);
1832 columns =
typename LWGraph::entries_type::non_const_type(
"columns", A->getLocalNumEntries());
1833 values =
typename CrsMatrix::local_matrix_type::values_type::host_mirror_type::non_const_type(
"values", A->getLocalNumEntries());
1836 GO numDropped = 0, numTotal = 0;
1837 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1838 LO row_block = row_block_number[row];
1839 size_t nnz = A->getNumEntriesInLocalRow(row);
1842 A->getLocalRowView(row, indices, vals);
1845 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1846 LO col = indices[colID];
1847 LO col_block = col_block_number[col];
1849 if (row_block == col_block) {
1850 if (generate_matrix) values[realnnz] = vals[colID];
1851 columns[realnnz++] = col;
1856 if (generate_matrix)
1857 rows_mat[row + 1] = realnnz;
1859 rows_graph[row + 1] = realnnz;
1866 numTotal = A->getLocalNumEntries();
1869 GO numLocalBoundaryNodes = 0;
1870 GO numGlobalBoundaryNodes = 0;
1871 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
1872 if (boundaryNodes(i))
1873 numLocalBoundaryNodes++;
1875 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1876 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1878 GO numGlobalTotal, numGlobalDropped;
1881 GetOStream(
Statistics1) <<
"Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped <<
"/" << numGlobalTotal;
1882 if (numGlobalTotal != 0)
1883 GetOStream(
Statistics1) <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)";
1887 Set(currentLevel,
"Filtering",
true);
1889 if (generate_matrix) {
1894 typename CrsMatrix::local_matrix_type::row_map_type>::value) {
1895 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values);
1897 auto rows_mat2 =
typename CrsMatrix::local_matrix_type::row_map_type::non_const_type(
"rows_mat2", rows_mat.extent(0));
1898 Kokkos::deep_copy(rows_mat2, rows_mat);
1899 auto columns2 =
typename CrsMatrix::local_graph_type::entries_type::non_const_type(
"columns2", columns.extent(0));
1900 Kokkos::deep_copy(columns2, columns);
1901 auto values2 =
typename CrsMatrix::local_matrix_type::values_type::non_const_type(
"values2", values.extent(0));
1902 Kokkos::deep_copy(values2, values);
1903 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat2, columns2, values2);
1905 crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1907 RCP<LWGraph> graph =
rcp(
new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(),
"block-diagonalized graph of A"));
1909 Set(currentLevel,
"Graph", graph);
1912 Set(currentLevel,
"DofsPerNode", 1);
1913 return crs_matrix_wrap;
1916 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1921 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
1923 GetOStream(
Statistics1) <<
"Using BlockDiagonal Graph after Dropping (with provided blocking)";
1924 if (localizeColoringGraph)
1925 GetOStream(
Statistics1) <<
", with localization" << std::endl;
1927 GetOStream(
Statistics1) <<
", without localization" << std::endl;
1935 typename LWGraph::row_type::non_const_type rows_graph(
"rows_graph", inputGraph->
GetNodeNumVertices() + 1);
1936 typename LWGraph::entries_type::non_const_type columns(
"columns", inputGraph->
GetNodeNumEdges());
1939 GO numDropped = 0, numTotal = 0;
1940 const LO numRows = Teuchos::as<LO>(inputGraph->
GetDomainMap()->getLocalNumElements());
1941 if (localizeColoringGraph) {
1942 for (
LO row = 0; row < numRows; ++row) {
1943 LO row_block = row_block_number[row];
1947 for (
LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1948 LO col = indices(colID);
1949 LO col_block = col_block_number[col];
1951 if ((row_block == col_block) && (col < numRows)) {
1952 columns(realnnz++) = col;
1957 rows_graph(row + 1) = realnnz;
1964 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1967 boundaryColumnVector->doImport(*boundaryNodesVector, *importer,
Xpetra::INSERT);
1968 auto boundaryColumn = boundaryColumnVector->getData(0);
1970 for (
LO row = 0; row < numRows; ++row) {
1971 LO row_block = row_block_number[row];
1975 for (
LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1976 LO col = indices(colID);
1977 LO col_block = col_block_number[col];
1979 if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1980 columns(realnnz++) = col;
1985 rows_graph(row + 1) = realnnz;
1993 GO numGlobalTotal, numGlobalDropped;
1996 GetOStream(
Statistics1) <<
"Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped <<
"/" << numGlobalTotal;
1997 if (numGlobalTotal != 0)
1998 GetOStream(
Statistics1) <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)";
2002 if (localizeColoringGraph) {
2003 outputGraph =
rcp(
new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->
GetDomainMap(), inputGraph->
GetImportMap(),
"block-diagonalized graph of A"));
2007 #ifdef HAVE_XPETRA_TPETRA
2008 auto outputGraph2 =
rcp(
new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->
GetDomainMap(), inputGraph->
GetImportMap(),
"block-diagonalized graph of A"));
2010 auto tpGraph =
Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
2012 auto tpGraphSym = sym->symmetrize();
2013 auto lclGraphSym = tpGraphSym->getLocalGraphHost();
2014 auto colIndsSym = lclGraphSym.entries;
2016 auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
2017 typename LWGraph::row_type::non_const_type rows_graphSym(
"rows_graphSym", rowsSym.size());
2018 for (
size_t row = 0; row < rowsSym.size(); row++)
2019 rows_graphSym(row) = rowsSym(row);
2028 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
Important warning messages (one line)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
typename local_graph_type::row_map_type row_type
Exception indicating invalid cast attempted.
void reserve(size_type n)
void BlockDiagonalizeGraph(const RCP< LWGraph > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< LWGraph > &outputGraph, RCP< const Import > &importer) const
static Teuchos::RCP< Vector > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
#define MueLu_sumAll(rcpComm, in, out)
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
void setValidator(RCP< const ParameterEntryValidator > const &validator)
T & get(const std::string &name, T def_value)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
DropTol & operator=(DropTol const &)=default
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level ¤tLevel, const RCP< Matrix > &A, bool generate_matrix) const
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
Exception throws to report incompatible objects (like maps).
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
Scalar GetThreshold() const
Return threshold value.
void resize(const size_type n, const T &val=T())
CoalesceDropFactory()
Constructor.
Kokkos::View< bool *, memory_space > boundary_nodes_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
#define SET_VALID_ENTRY(name)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void resize(size_type new_size, const value_type &x=value_type())
void Build(Level ¤tLevel) const
Build an object with this factory.
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
void DeclareInput(Level ¤tLevel) const
Input.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > toTpetra(const RCP< const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &graph)
void push_back(const value_type &x)
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
RCP< const CrsGraph< int, GlobalOrdinal, Node > > toXpetra(const Epetra_CrsGraph &g)
const RCP< const Map > GetDomainMap() const
Lightweight MueLu representation of a compressed row storage graph.
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
ParameterEntry & getEntry(const std::string &name)
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const