10 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
17 #include <Xpetra_MapFactory.hpp>
18 #include <Xpetra_Map.hpp>
20 #include <Xpetra_MultiVectorFactory.hpp>
22 #include <Xpetra_StridedMap.hpp>
28 #include <Kokkos_NestedSort.hpp>
29 #include <Kokkos_StdAlgorithms.hpp>
32 #include "MueLu_AmalgamationFactory.hpp"
33 #include "MueLu_AmalgamationInfo.hpp"
35 #include "MueLu_LWGraph.hpp"
38 #include "MueLu_LWGraph.hpp"
41 #include "MueLu_PreDropFunctionConstVal.hpp"
42 #include "MueLu_Utilities.hpp"
44 #ifdef HAVE_XPETRA_TPETRA
45 #include "Tpetra_CrsGraphTransposer.hpp"
59 template <
class real_type,
class LO>
68 DropTol(real_type val_, real_type diag_,
LO col_,
bool drop_)
89 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
93 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
101 SET_VALID_ENTRY(
"aggregation: distance laplacian directional weights");
106 validParamList->
getEntry(
"aggregation: drop scheme").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"signed classical sa",
"classical",
"distance laplacian",
"signed classical",
"block diagonal",
"block diagonal classical",
"block diagonal distance laplacian",
"block diagonal signed classical",
"block diagonal colored signed classical"))));
111 #undef SET_VALID_ENTRY
112 validParamList->
set<
bool>(
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
115 validParamList->
set<
RCP<const FactoryBase>>(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
119 return validParamList;
122 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
124 : predrop_(Teuchos::null) {}
126 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
128 Input(currentLevel,
"A");
129 Input(currentLevel,
"UnAmalgamationInfo");
132 if (pL.
get<
bool>(
"lightweight wrap") ==
true) {
133 std::string algo = pL.
get<std::string>(
"aggregation: drop scheme");
134 if (algo ==
"distance laplacian" || algo ==
"block diagonal distance laplacian") {
135 Input(currentLevel,
"Coordinates");
137 if (algo ==
"signed classical sa")
139 else if (algo.find(
"block diagonal") != std::string::npos || algo.find(
"signed classical") != std::string::npos) {
140 Input(currentLevel,
"BlockNumber");
145 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
150 typedef typename STS::magnitudeType real_type;
154 if (predrop_ != Teuchos::null)
155 GetOStream(
Parameters0) << predrop_->description();
157 RCP<Matrix> realA = Get<RCP<Matrix>>(currentLevel,
"A");
160 bool doExperimentalWrap = pL.get<
bool>(
"lightweight wrap");
162 GetOStream(
Parameters0) <<
"lightweight wrap = " << doExperimentalWrap << std::endl;
163 std::string algo = pL.get<std::string>(
"aggregation: drop scheme");
164 const bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
169 bool use_block_algorithm =
false;
170 LO interleaved_blocksize = as<LO>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize"));
171 bool useSignedClassicalRS =
false;
172 bool useSignedClassicalSA =
false;
173 bool generateColoringGraph =
false;
177 typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
181 if (algo ==
"distance laplacian") {
183 Coords = Get<RCP<RealValuedMultiVector>>(currentLevel,
"Coordinates");
185 }
else if (algo ==
"signed classical sa") {
186 useSignedClassicalSA =
true;
189 }
else if (algo ==
"signed classical" || algo ==
"block diagonal colored signed classical" || algo ==
"block diagonal signed classical") {
190 useSignedClassicalRS =
true;
195 if (!importer.is_null()) {
198 ghostedBlockNumber->doImport(*BlockNumber, *importer,
Xpetra::INSERT);
200 ghostedBlockNumber = BlockNumber;
203 if (algo ==
"block diagonal colored signed classical")
204 generateColoringGraph =
true;
208 }
else if (algo ==
"block diagonal") {
210 BlockDiagonalize(currentLevel, realA,
false);
212 }
else if (algo ==
"block diagonal classical" || algo ==
"block diagonal distance laplacian") {
214 use_block_algorithm =
true;
215 RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel, realA,
true);
216 if (algo ==
"block diagonal distance laplacian") {
219 if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
220 LO dim = (
LO)OldCoords->getNumVectors();
221 Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
222 for (
LO k = 0; k < dim; k++) {
225 for (
LO i = 0; i < (
LO)OldCoords->getLocalLength(); i++) {
226 LO new_base = i * dim;
227 for (
LO j = 0; j < interleaved_blocksize; j++)
228 new_vec[new_base + j] = old_vec[i];
234 algo =
"distance laplacian";
235 }
else if (algo ==
"block diagonal classical") {
247 enum { NO_WEIGHTS = 0,
250 int use_dlap_weights = NO_WEIGHTS;
251 if (algo ==
"distance laplacian") {
252 LO dim = (
LO)Coords->getNumVectors();
254 bool non_unity =
false;
255 for (
LO i = 0; !non_unity && i < (
LO)dlap_weights.
size(); i++) {
256 if (dlap_weights[i] != 1.0) {
261 LO blocksize = use_block_algorithm ? as<LO>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize")) : 1;
262 if ((
LO)dlap_weights.
size() == dim)
263 use_dlap_weights = SINGLE_WEIGHTS;
264 else if ((
LO)dlap_weights.
size() == blocksize * dim)
265 use_dlap_weights = BLOCK_WEIGHTS;
268 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
271 GetOStream(
Statistics1) <<
"Using distance laplacian weights: " << dlap_weights << std::endl;
286 if (doExperimentalWrap) {
292 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
293 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
295 threshold = as<SC>(pL.get<
double>(
"aggregation: drop tol"));
297 std::string distanceLaplacianAlgoStr = pL.get<std::string>(
"aggregation: distance laplacian algo");
298 std::string classicalAlgoStr = pL.get<std::string>(
"aggregation: classical algo");
299 real_type realThreshold = STS::magnitude(threshold);
303 #ifdef HAVE_MUELU_DEBUG
304 int distanceLaplacianCutVerbose = 0;
306 #ifdef DJS_READ_ENV_VARIABLES
307 if (getenv(
"MUELU_DROP_TOLERANCE_MODE")) {
308 distanceLaplacianAlgoStr = std::string(getenv(
"MUELU_DROP_TOLERANCE_MODE"));
311 if (getenv(
"MUELU_DROP_TOLERANCE_THRESHOLD")) {
312 auto tmp = atoi(getenv(
"MUELU_DROP_TOLERANCE_THRESHOLD"));
313 realThreshold = 1e-4 * tmp;
316 #ifdef HAVE_MUELU_DEBUG
317 if (getenv(
"MUELU_DROP_TOLERANCE_VERBOSE")) {
318 distanceLaplacianCutVerbose = atoi(getenv(
"MUELU_DROP_TOLERANCE_VERBOSE"));
326 if (algo ==
"distance laplacian") {
327 if (distanceLaplacianAlgoStr ==
"default")
329 else if (distanceLaplacianAlgoStr ==
"unscaled cut")
331 else if (distanceLaplacianAlgoStr ==
"scaled cut")
333 else if (distanceLaplacianAlgoStr ==
"scaled cut symmetric")
337 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
338 }
else if (algo ==
"classical") {
339 if (classicalAlgoStr ==
"default")
341 else if (classicalAlgoStr ==
"unscaled cut")
343 else if (classicalAlgoStr ==
"scaled cut")
347 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\" classical algorithm = \"" << classicalAlgoStr <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
350 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
351 Set<bool>(currentLevel,
"Filtering", (threshold != STS::zero()));
353 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
359 GO numDropped = 0, numTotal = 0;
360 std::string graphType =
"unamalgamated";
379 const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
382 if (algo ==
"classical") {
383 if (predrop_ == null) {
388 if (predrop_ != null) {
391 "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
394 if (newt != threshold) {
395 GetOStream(
Warnings0) <<
"switching threshold parameter from " << threshold <<
" (list) to " << newt <<
" (user function" << std::endl;
402 if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
411 numTotal = A->getLocalNumEntries();
414 GO numLocalBoundaryNodes = 0;
415 GO numGlobalBoundaryNodes = 0;
416 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
417 if (boundaryNodes[i])
418 numLocalBoundaryNodes++;
420 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
421 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
424 Set(currentLevel,
"DofsPerNode", 1);
425 Set(currentLevel,
"Graph", graph);
427 }
else if ((BlockSize == 1 && threshold != STS::zero()) ||
428 (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
429 (BlockSize == 1 && useSignedClassicalRS) ||
430 (BlockSize == 1 && useSignedClassicalSA)) {
436 typename LWGraph::row_type::non_const_type
rows(
"rows", A->getLocalNumRows() + 1);
437 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
439 using MT =
typename STS::magnitudeType;
444 if (useSignedClassicalRS) {
445 if (ghostedBlockNumber.
is_null()) {
447 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
449 GetOStream(
Statistics1) <<
"Calculated max point off-diagonal" << std::endl;
452 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
454 GetOStream(
Statistics1) <<
"Calculating max block off-diagonal" << std::endl;
459 ghostedDiagVals = ghostedDiag->getData(0);
463 if (rowSumTol > 0.) {
464 if (ghostedBlockNumber.
is_null()) {
466 GetOStream(
Statistics1) <<
"Applying point row sum criterion." << std::endl;
470 GetOStream(
Statistics1) <<
"Applying block row sum criterion." << std::endl;
476 if (!ghostedBlockNumber.
is_null())
477 g_block_id = ghostedBlockNumber->getData(0);
483 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
484 size_t nnz = A->getNumEntriesInLocalRow(row);
485 bool rowIsDirichlet = boundaryNodes[row];
488 A->getLocalRowView(row, indices, vals);
496 if (useSignedClassicalRS) {
498 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
499 LO col = indices[colID];
500 MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
501 MT neg_aij = -STS::real(vals[colID]);
506 if ((!rowIsDirichlet && (g_block_id.
is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
507 columns[realnnz++] = col;
512 rows(row + 1) = realnnz;
513 }
else if (useSignedClassicalSA) {
515 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
516 LO col = indices[colID];
518 bool is_nonpositive = STS::real(vals[colID]) <= 0;
519 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]);
520 MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID]));
526 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
527 columns(realnnz++) = col;
532 rows[row + 1] = realnnz;
535 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
536 LO col = indices[colID];
537 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]);
538 MT aij = STS::magnitude(vals[colID] * vals[colID]);
540 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
541 columns(realnnz++) = col;
546 rows(row + 1) = realnnz;
552 using ExecSpace =
typename Node::execution_space;
553 using TeamPol = Kokkos::TeamPolicy<ExecSpace>;
554 using TeamMem =
typename TeamPol::member_type;
555 using ATS = Kokkos::ArithTraits<Scalar>;
556 using impl_scalar_type =
typename ATS::val_type;
557 using implATS = Kokkos::ArithTraits<impl_scalar_type>;
560 auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadOnly), Kokkos::ALL(), 0);
561 auto thresholdKokkos =
static_cast<impl_scalar_type
>(threshold);
562 auto realThresholdKokkos = implATS::magnitude(thresholdKokkos);
563 auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns);
565 auto A_device = A->getLocalMatrixDevice();
571 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
575 boundaryColumnVector->doImport(*boundaryNodesVector, *importer,
Xpetra::INSERT);
577 boundaryColumnVector = boundaryNodesVector;
579 auto boundaryColumn = boundaryColumnVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
580 auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0);
582 Kokkos::View<LO*, ExecSpace> rownnzView(
"rownnzView", A_device.numRows());
583 auto drop_views = Kokkos::View<bool*, ExecSpace>(
"drop_views", A_device.nnz());
584 auto index_views = Kokkos::View<size_t*, ExecSpace>(
"index_views", A_device.nnz());
586 Kokkos::parallel_reduce(
587 "classical_cut", TeamPol(A_device.numRows(), Kokkos::AUTO), KOKKOS_LAMBDA(
const TeamMem& teamMember,
LO& globalnnz,
GO& totalDropped) {
588 LO row = teamMember.league_rank();
589 auto rowView = A_device.rowConst(row);
590 size_t nnz = rowView.length;
592 auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
593 auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
596 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (
LO)nnz), [&](
const LO colID) {
597 index_view(colID) = colID;
598 LO col = rowView.colidx(colID);
601 if (row == col || boundary(col)) {
602 drop_view(colID) =
true;
604 drop_view(colID) =
false;
608 size_t dropStart = nnz;
611 Kokkos::Experimental::sort_team(teamMember, index_view, [=](
size_t& x,
size_t& y) ->
bool {
612 if (drop_view(x) || drop_view(y)) {
613 return drop_view(x) < drop_view(y);
615 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
616 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
617 return x_aij > y_aij;
622 Kokkos::parallel_reduce(
623 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](
size_t i,
size_t& min) {
624 auto const& x = index_view(i - 1);
625 auto const& y = index_view(i);
626 typename implATS::magnitudeType x_aij = 0;
627 typename implATS::magnitudeType y_aij = 0;
629 x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
632 y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
635 if (realThresholdKokkos * realThresholdKokkos * x_aij > y_aij) {
641 Kokkos::Min<size_t>(dropStart));
644 Kokkos::Experimental::sort_team(teamMember, index_view, [=](
size_t& x,
size_t& y) ->
bool {
645 if (drop_view(x) || drop_view(y)) {
646 return drop_view(x) < drop_view(y);
648 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
649 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
650 auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
651 auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
652 return (x_aij / x_aiiajj) > (y_aij / y_aiiajj);
657 Kokkos::parallel_reduce(
658 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](
size_t i,
size_t& min) {
659 auto const& x = index_view(i - 1);
660 auto const& y = index_view(i);
661 typename implATS::magnitudeType x_val = 0;
662 typename implATS::magnitudeType y_val = 0;
664 typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
665 typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
666 x_val = x_aij / x_aiiajj;
669 typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
670 typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
671 y_val = y_aij / y_aiiajj;
674 if (realThresholdKokkos * realThresholdKokkos * x_val > y_val) {
680 Kokkos::Min<size_t>(dropStart));
684 if (dropStart < nnz) {
685 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](
size_t i) {
686 drop_view(index_view(i)) =
true;
692 Kokkos::parallel_reduce(
693 Kokkos::TeamThreadRange(teamMember, nnz), [=](
const size_t idxID,
LO& keep,
GO& drop) {
694 LO col = rowView.colidx(idxID);
696 if (row == col || !drop_view(idxID)) {
697 columnsDevice(A_device.graph.row_map(row) + idxID) = col;
700 columnsDevice(A_device.graph.row_map(row) + idxID) = -1;
707 totalDropped += rowDropped;
708 rownnzView(row) = rownnz;
710 realnnz, numDropped);
713 Kokkos::Experimental::remove(ExecSpace(), columnsDevice, -1);
714 Kokkos::deep_copy(columns, columnsDevice);
717 auto rowsDevice = Kokkos::create_mirror_view(ExecSpace(),
rows);
718 Kokkos::parallel_scan(
719 Kokkos::RangePolicy<ExecSpace>(0, A_device.numRows()), KOKKOS_LAMBDA(
const int i,
LO& partial_sum,
bool is_final) {
720 partial_sum += rownnzView(i);
721 if (is_final) rowsDevice(i + 1) = partial_sum;
723 Kokkos::deep_copy(
rows, rowsDevice);
726 numTotal = A->getLocalNumEntries();
728 if (aggregationMayCreateDirichlet) {
730 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
732 boundaryNodes[row] =
true;
736 RCP<LWGraph> graph =
rcp(
new LWGraph(
rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(),
"thresholded graph of A"));
739 GO numLocalBoundaryNodes = 0;
740 GO numGlobalBoundaryNodes = 0;
741 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
742 if (boundaryNodes(i))
743 numLocalBoundaryNodes++;
745 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
746 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
748 Set(currentLevel,
"Graph", graph);
749 Set(currentLevel,
"DofsPerNode", 1);
752 if (generateColoringGraph) {
755 BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer);
756 Set(currentLevel,
"Coloring Graph", colorGraph);
776 }
else if (BlockSize > 1 && threshold == STS::zero()) {
781 graphType =
"amalgamated";
789 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
790 Array<LO> colTranslation = *(amalInfo->getColTranslation());
793 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
796 typename LWGraph::row_type::non_const_type
rows(
"rows", numRows + 1);
797 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
800 Kokkos::deep_copy(amalgBoundaryNodes,
false);
812 LO blkSize = A->GetFixedBlockSize();
814 LO blkPartSize = A->GetFixedBlockSize();
815 if (A->IsView(
"stridedMaps") ==
true) {
819 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
820 blkId = strMap->getStridedBlockId();
822 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
829 for (
LO row = 0; row < numRows; row++) {
839 bool isBoundary =
false;
840 if (pL.get<
bool>(
"aggregation: greedy Dirichlet") ==
true) {
841 for (
LO j = 0; j < blkPartSize; j++) {
842 if (pointBoundaryNodes[row * blkPartSize + j]) {
849 for (
LO j = 0; j < blkPartSize; j++) {
850 if (!pointBoundaryNodes[row * blkPartSize + j]) {
860 MergeRows(*A, row, indicesExtra, colTranslation);
863 indices = indicesExtra;
864 numTotal += indices.
size();
868 LO nnz = indices.
size(), rownnz = 0;
869 for (
LO colID = 0; colID < nnz; colID++) {
870 LO col = indices[colID];
871 columns(realnnz++) = col;
882 amalgBoundaryNodes[row] =
true;
884 rows(row + 1) = realnnz;
887 RCP<LWGraph> graph =
rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
891 GO numLocalBoundaryNodes = 0;
892 GO numGlobalBoundaryNodes = 0;
894 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
895 if (amalgBoundaryNodes(i))
896 numLocalBoundaryNodes++;
899 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
900 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes
901 <<
" agglomerated Dirichlet nodes" << std::endl;
904 Set(currentLevel,
"Graph", graph);
905 Set(currentLevel,
"DofsPerNode", blkSize);
907 }
else if (BlockSize > 1 && threshold != STS::zero()) {
911 graphType =
"amalgamated";
919 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
920 Array<LO> colTranslation = *(amalInfo->getColTranslation());
923 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
926 typename LWGraph::row_type::non_const_type
rows(
"rows", numRows + 1);
927 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
930 Kokkos::deep_copy(amalgBoundaryNodes,
false);
941 LO blkSize = A->GetFixedBlockSize();
943 LO blkPartSize = A->GetFixedBlockSize();
944 if (A->IsView(
"stridedMaps") ==
true) {
948 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
949 blkId = strMap->getStridedBlockId();
951 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
962 for (
LO row = 0; row < numRows; row++) {
972 bool isBoundary =
false;
973 if (pL.get<
bool>(
"aggregation: greedy Dirichlet") ==
true) {
974 for (
LO j = 0; j < blkPartSize; j++) {
975 if (pointBoundaryNodes[row * blkPartSize + j]) {
982 for (
LO j = 0; j < blkPartSize; j++) {
983 if (!pointBoundaryNodes[row * blkPartSize + j]) {
993 MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
996 indices = indicesExtra;
997 numTotal += indices.
size();
1001 LO nnz = indices.
size(), rownnz = 0;
1002 for (
LO colID = 0; colID < nnz; colID++) {
1003 LO col = indices[colID];
1004 columns[realnnz++] = col;
1015 amalgBoundaryNodes[row] =
true;
1017 rows[row + 1] = realnnz;
1021 RCP<LWGraph> graph =
rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
1025 GO numLocalBoundaryNodes = 0;
1026 GO numGlobalBoundaryNodes = 0;
1028 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1029 if (amalgBoundaryNodes(i))
1030 numLocalBoundaryNodes++;
1033 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1034 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes
1035 <<
" agglomerated Dirichlet nodes" << std::endl;
1038 Set(currentLevel,
"Graph", graph);
1039 Set(currentLevel,
"DofsPerNode", blkSize);
1042 }
else if (algo ==
"distance laplacian") {
1043 LO blkSize = A->GetFixedBlockSize();
1044 GO indexBase = A->getRowMap()->getIndexBase();
1059 if ((blkSize == 1) && (threshold == STS::zero())) {
1063 graphType =
"unamalgamated";
1064 numTotal = A->getLocalNumEntries();
1067 GO numLocalBoundaryNodes = 0;
1068 GO numGlobalBoundaryNodes = 0;
1069 for (
size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1070 if (pointBoundaryNodes(i))
1071 numLocalBoundaryNodes++;
1073 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1074 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1077 Set(currentLevel,
"DofsPerNode", blkSize);
1078 Set(currentLevel,
"Graph", graph);
1094 "Coordinate vector length (" << Coords->getLocalLength() <<
") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() <<
") by modulo block size (" << blkSize <<
").");
1100 uniqueMap = A->getRowMap();
1101 nonUniqueMap = A->getColMap();
1102 graphType =
"unamalgamated";
1105 uniqueMap = Coords->getMap();
1107 "Different index bases for matrix and coordinates");
1111 graphType =
"amalgamated";
1113 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1118 if (threshold != STS::zero()) {
1123 if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1124 GetOStream(
Warnings1) <<
"Using existing importer from matrix graph" << std::endl;
1125 importer = realA->getCrsGraph()->getImporter();
1127 GetOStream(
Warnings0) <<
"Constructing new importer instance" << std::endl;
1128 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1138 RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1141 if (threshold != STS::zero()) {
1142 const size_t numVectors = ghostedCoords->getNumVectors();
1143 coordData.
reserve(numVectors);
1144 for (
size_t j = 0; j < numVectors; j++) {
1151 ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1152 for (
LO row = 0; row < numRows; row++) {
1157 A->getLocalRowView(row, indices, vals);
1162 MergeRows(*A, row, indicesExtra, colTranslation);
1163 indices = indicesExtra;
1167 bool haveAddedToDiag =
false;
1168 for (
LO colID = 0; colID < nnz; colID++) {
1169 const LO col = indices[colID];
1172 if (use_dlap_weights == SINGLE_WEIGHTS) {
1177 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1178 int block_id = row % interleaved_blocksize;
1179 int block_start = block_id * interleaved_blocksize;
1185 haveAddedToDiag =
true;
1190 if (!haveAddedToDiag)
1191 localLaplDiagData[row] = STS::rmax();
1196 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1197 ghostedLaplDiag->doImport(*localLaplDiag, *importer,
Xpetra::INSERT);
1198 ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1202 GetOStream(
Runtime0) <<
"Skipping distance laplacian construction due to 0 threshold" << std::endl;
1208 typename LWGraph::row_type::non_const_type
rows(
"rows", numRows + 1);
1209 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
1211 #ifdef HAVE_MUELU_DEBUG
1213 for (
LO i = 0; i < (
LO)columns.size(); i++) columns[i] = -666;
1218 bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo ==
scaled_cut_symmetric;
1221 rows_stop.
resize(numRows);
1224 Kokkos::deep_copy(amalgBoundaryNodes,
false);
1233 if (threshold != STS::zero()) {
1234 const size_t numVectors = ghostedCoords->getNumVectors();
1235 coordData.
reserve(numVectors);
1236 for (
size_t j = 0; j < numVectors; j++) {
1243 for (
LO row = 0; row < numRows; row++) {
1246 bool isBoundary =
false;
1250 A->getLocalRowView(row, indices, vals);
1251 isBoundary = pointBoundaryNodes[row];
1254 for (
LO j = 0; j < blkSize; j++) {
1255 if (!pointBoundaryNodes[row * blkSize + j]) {
1263 MergeRows(*A, row, indicesExtra, colTranslation);
1266 indices = indicesExtra;
1268 numTotal += indices.
size();
1270 LO nnz = indices.
size(), rownnz = 0;
1272 if (use_stop_array) {
1274 realnnz =
rows(row);
1277 if (threshold != STS::zero()) {
1281 for (
LO colID = 0; colID < nnz; colID++) {
1282 LO col = indices[colID];
1285 columns(realnnz++) = col;
1291 if (isBoundary)
continue;
1294 if (use_dlap_weights == SINGLE_WEIGHTS) {
1296 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1297 int block_id = row % interleaved_blocksize;
1298 int block_start = block_id * interleaved_blocksize;
1303 real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1304 real_type aij = STS::magnitude(laplVal * laplVal);
1307 columns(realnnz++) = col;
1316 std::vector<DropTol> drop_vec;
1317 drop_vec.reserve(nnz);
1322 for (
LO colID = 0; colID < nnz; colID++) {
1323 LO col = indices[colID];
1326 drop_vec.emplace_back(zero, one, colID,
false);
1330 if (isBoundary)
continue;
1333 if (use_dlap_weights == SINGLE_WEIGHTS) {
1335 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1336 int block_id = row % interleaved_blocksize;
1337 int block_start = block_id * interleaved_blocksize;
1343 real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1344 real_type aij = STS::magnitude(laplVal * laplVal);
1346 drop_vec.emplace_back(aij, aiiajj, colID,
false);
1349 const size_t n = drop_vec.size();
1352 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1353 return a.val > b.val;
1357 for (
size_t i = 1; i < n; ++i) {
1359 auto const& x = drop_vec[i - 1];
1360 auto const& y = drop_vec[i];
1363 if (realThreshold * realThreshold * a > b) {
1365 #ifdef HAVE_MUELU_DEBUG
1366 if (distanceLaplacianCutVerbose) {
1367 std::cout <<
"DJS: KEEP, N, ROW: " << i + 1 <<
", " << n <<
", " << row << std::endl;
1372 drop_vec[i].drop = drop;
1375 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1376 return a.val / a.diag > b.val / b.diag;
1380 for (
size_t i = 1; i < n; ++i) {
1382 auto const& x = drop_vec[i - 1];
1383 auto const& y = drop_vec[i];
1384 auto a = x.val / x.diag;
1385 auto b = y.val / y.diag;
1386 if (realThreshold * realThreshold * a > b) {
1388 #ifdef HAVE_MUELU_DEBUG
1389 if (distanceLaplacianCutVerbose) {
1390 std::cout <<
"DJS: KEEP, N, ROW: " << i + 1 <<
", " << n <<
", " << row << std::endl;
1395 drop_vec[i].drop = drop;
1399 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1400 return a.col < b.col;
1403 for (
LO idxID = 0; idxID < (
LO)drop_vec.size(); idxID++) {
1404 LO col = indices[drop_vec[idxID].col];
1408 columns(realnnz++) = col;
1414 if (!drop_vec[idxID].drop) {
1415 columns(realnnz++) = col;
1426 for (
LO colID = 0; colID < nnz; colID++) {
1427 LO col = indices[colID];
1428 columns(realnnz++) = col;
1440 amalgBoundaryNodes[row] =
true;
1444 rows_stop[row] = rownnz + rows[row];
1446 rows[row + 1] = realnnz;
1451 if (use_stop_array) {
1454 for (
LO row = 0; row < numRows; row++) {
1455 for (
LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1456 LO col = columns[colidx];
1457 if (col >= numRows)
continue;
1460 for (
LO t_col =
rows(col); !found && t_col < rows_stop[col]; t_col++) {
1461 if (columns[t_col] == row)
1466 if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1467 LO new_idx = rows_stop[col];
1469 columns[new_idx] = row;
1477 LO current_start = 0;
1478 for (
LO row = 0; row < numRows; row++) {
1479 LO old_start = current_start;
1480 for (
LO col =
rows(row); col < rows_stop[row]; col++) {
1481 if (current_start != col) {
1482 columns(current_start) = columns(col);
1486 rows[row] = old_start;
1488 rows(numRows) = realnnz = current_start;
1494 graph =
rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
1499 GO numLocalBoundaryNodes = 0;
1500 GO numGlobalBoundaryNodes = 0;
1502 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1503 if (amalgBoundaryNodes(i))
1504 numLocalBoundaryNodes++;
1507 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1508 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" agglomerated Dirichlet nodes"
1509 <<
" using threshold " << dirichletThreshold << std::endl;
1512 Set(currentLevel,
"Graph", graph);
1513 Set(currentLevel,
"DofsPerNode", blkSize);
1517 if ((GetVerbLevel() &
Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1519 GO numGlobalTotal, numGlobalDropped;
1522 GetOStream(
Statistics1) <<
"Number of dropped entries in " << graphType <<
" matrix graph: " << numGlobalDropped <<
"/" << numGlobalTotal;
1523 if (numGlobalTotal != 0)
1524 GetOStream(
Statistics1) <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)";
1531 SC threshold = as<SC>(pL.get<
double>(
"aggregation: drop tol"));
1533 GetOStream(
Runtime0) <<
"algorithm = \""
1535 <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
1536 Set<bool>(currentLevel,
"Filtering", (threshold != STS::zero()));
1542 GO indexBase = rowMap->getIndexBase();
1546 if (A->IsView(
"stridedMaps") &&
1547 Teuchos::rcp_dynamic_cast<
const StridedMap>(A->getRowMap(
"stridedMaps")) != Teuchos::null) {
1551 blockdim = strMap->getFixedBlockSize();
1552 offset = strMap->getOffset();
1553 oldView = A->SwitchToView(oldView);
1554 GetOStream(
Statistics1) <<
"CoalesceDropFactory::Build():"
1555 <<
" found blockdim=" << blockdim <<
" from strided maps. offset=" << offset << std::endl;
1557 GetOStream(
Statistics1) <<
"CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1562 GetOStream(
Statistics1) <<
"CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() <<
"/" << nodeMap->getGlobalNumElements() <<
" elements" << std::endl;
1565 RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1567 LO numRows = A->getRowMap()->getLocalNumElements();
1568 LO numNodes = nodeMap->getLocalNumElements();
1570 Kokkos::deep_copy(amalgBoundaryNodes,
false);
1571 const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0);
1572 bool bIsDiagonalEntry =
false;
1577 for (
LO row = 0; row < numRows; row++) {
1579 GO grid = rowMap->getGlobalElement(row);
1582 bIsDiagonalEntry =
false;
1587 size_t nnz = A->getNumEntriesInLocalRow(row);
1590 A->getLocalRowView(row, indices, vals);
1594 for (
LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1595 GO gcid = colMap->getGlobalElement(indices[col]);
1597 if (vals[col] != STS::zero()) {
1599 cnodeIds->push_back(cnodeId);
1601 if (grid == gcid) bIsDiagonalEntry =
true;
1605 if (realnnz == 1 && bIsDiagonalEntry ==
true) {
1606 LO lNodeId = nodeMap->getLocalElement(nodeId);
1607 numberDirichletRowsPerNode[lNodeId] += 1;
1608 if (numberDirichletRowsPerNode[lNodeId] == blockdim)
1609 amalgBoundaryNodes[lNodeId] =
true;
1614 if (arr_cnodeIds.
size() > 0)
1615 crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1618 crsGraph->fillComplete(nodeMap, nodeMap);
1627 GO numLocalBoundaryNodes = 0;
1628 GO numGlobalBoundaryNodes = 0;
1629 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1630 if (amalgBoundaryNodes(i))
1631 numLocalBoundaryNodes++;
1633 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1634 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1639 Set(currentLevel,
"DofsPerNode", blockdim);
1640 Set(currentLevel,
"Graph", graph);
1646 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1651 LO blkSize = A.GetFixedBlockSize();
1652 if (A.IsView(
"stridedMaps") ==
true) {
1656 if (strMap->getStridedBlockId() > -1)
1657 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1661 size_t nnz = 0, pos = 0;
1662 for (
LO j = 0; j < blkSize; j++)
1663 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1675 for (
LO j = 0; j < blkSize; j++) {
1676 A.getLocalRowView(row * blkSize + j, inds, vals);
1677 size_type numIndices = inds.
size();
1679 if (numIndices == 0)
1683 cols[pos++] = translation[inds[0]];
1684 for (size_type k = 1; k < numIndices; k++) {
1685 LO nodeID = translation[inds[k]];
1689 if (nodeID != cols[pos - 1])
1690 cols[pos++] = nodeID;
1697 std::sort(cols.
begin(), cols.
end());
1699 for (
size_t j = 1; j < nnz; j++)
1700 if (cols[j] != cols[pos])
1701 cols[++pos] = cols[j];
1705 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1711 LO blkSize = A.GetFixedBlockSize();
1712 if (A.IsView(
"stridedMaps") ==
true) {
1716 if (strMap->getStridedBlockId() > -1)
1717 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1721 size_t nnz = 0, pos = 0;
1722 for (
LO j = 0; j < blkSize; j++)
1723 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1735 for (
LO j = 0; j < blkSize; j++) {
1736 A.getLocalRowView(row * blkSize + j, inds, vals);
1737 size_type numIndices = inds.
size();
1739 if (numIndices == 0)
1744 for (size_type k = 0; k < numIndices; k++) {
1746 LO nodeID = translation[inds[k]];
1749 typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]);
1750 typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]);
1753 if (aij > aiiajj || (row * blkSize + j == dofID)) {
1759 if (nodeID != prevNodeID) {
1760 cols[pos++] = nodeID;
1761 prevNodeID = nodeID;
1770 std::sort(cols.
begin(), cols.
end());
1772 for (
size_t j = 1; j < nnz; j++)
1773 if (cols[j] != cols[pos])
1774 cols[++pos] = cols[j];
1780 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1785 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.
get<
double>(
"aggregation: Dirichlet threshold")));
1786 const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.
get<
double>(
"aggregation: row sum drop tol"));
1790 GetOStream(
Statistics1) <<
"Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
1797 ghostedBlockNumber->doImport(*BlockNumber, *importer,
Xpetra::INSERT);
1799 ghostedBlockNumber = BlockNumber;
1807 typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type rows_mat;
1808 typename LWGraph::row_type::non_const_type rows_graph;
1809 typename LWGraph::entries_type::non_const_type columns;
1810 typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type values;
1813 if (generate_matrix) {
1814 crs_matrix_wrap =
rcp(
new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1815 rows_mat =
typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type(
"rows_mat", A->getLocalNumRows() + 1);
1817 rows_graph =
typename LWGraph::row_type::non_const_type(
"rows_graph", A->getLocalNumRows() + 1);
1819 columns =
typename LWGraph::entries_type::non_const_type(
"columns", A->getLocalNumEntries());
1820 values =
typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type(
"values", A->getLocalNumEntries());
1823 GO numDropped = 0, numTotal = 0;
1824 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1825 LO row_block = row_block_number[row];
1826 size_t nnz = A->getNumEntriesInLocalRow(row);
1829 A->getLocalRowView(row, indices, vals);
1832 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1833 LO col = indices[colID];
1834 LO col_block = col_block_number[col];
1836 if (row_block == col_block) {
1837 if (generate_matrix) values[realnnz] = vals[colID];
1838 columns[realnnz++] = col;
1843 if (generate_matrix)
1844 rows_mat[row + 1] = realnnz;
1846 rows_graph[row + 1] = realnnz;
1853 numTotal = A->getLocalNumEntries();
1856 GO numLocalBoundaryNodes = 0;
1857 GO numGlobalBoundaryNodes = 0;
1858 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
1859 if (boundaryNodes(i))
1860 numLocalBoundaryNodes++;
1862 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1863 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1865 GO numGlobalTotal, numGlobalDropped;
1868 GetOStream(
Statistics1) <<
"Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped <<
"/" << numGlobalTotal;
1869 if (numGlobalTotal != 0)
1870 GetOStream(
Statistics1) <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)";
1874 Set(currentLevel,
"Filtering",
true);
1876 if (generate_matrix) {
1881 typename CrsMatrix::local_matrix_type::row_map_type>::value) {
1882 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values);
1884 auto rows_mat2 =
typename CrsMatrix::local_matrix_type::row_map_type::non_const_type(
"rows_mat2", rows_mat.extent(0));
1885 Kokkos::deep_copy(rows_mat2, rows_mat);
1886 auto columns2 =
typename CrsMatrix::local_graph_type::entries_type::non_const_type(
"columns2", columns.extent(0));
1887 Kokkos::deep_copy(columns2, columns);
1888 auto values2 =
typename CrsMatrix::local_matrix_type::values_type::non_const_type(
"values2", values.extent(0));
1889 Kokkos::deep_copy(values2, values);
1890 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat2, columns2, values2);
1892 crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1894 RCP<LWGraph> graph =
rcp(
new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(),
"block-diagonalized graph of A"));
1896 Set(currentLevel,
"Graph", graph);
1899 Set(currentLevel,
"DofsPerNode", 1);
1900 return crs_matrix_wrap;
1903 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1908 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
1910 GetOStream(
Statistics1) <<
"Using BlockDiagonal Graph after Dropping (with provided blocking)";
1911 if (localizeColoringGraph)
1912 GetOStream(
Statistics1) <<
", with localization" << std::endl;
1914 GetOStream(
Statistics1) <<
", without localization" << std::endl;
1922 typename LWGraph::row_type::non_const_type rows_graph(
"rows_graph", inputGraph->
GetNodeNumVertices() + 1);
1923 typename LWGraph::entries_type::non_const_type columns(
"columns", inputGraph->
GetNodeNumEdges());
1926 GO numDropped = 0, numTotal = 0;
1927 const LO numRows = Teuchos::as<LO>(inputGraph->
GetDomainMap()->getLocalNumElements());
1928 if (localizeColoringGraph) {
1929 for (
LO row = 0; row < numRows; ++row) {
1930 LO row_block = row_block_number[row];
1934 for (
LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1935 LO col = indices(colID);
1936 LO col_block = col_block_number[col];
1938 if ((row_block == col_block) && (col < numRows)) {
1939 columns(realnnz++) = col;
1944 rows_graph(row + 1) = realnnz;
1951 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1954 boundaryColumnVector->doImport(*boundaryNodesVector, *importer,
Xpetra::INSERT);
1955 auto boundaryColumn = boundaryColumnVector->getData(0);
1957 for (
LO row = 0; row < numRows; ++row) {
1958 LO row_block = row_block_number[row];
1962 for (
LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1963 LO col = indices(colID);
1964 LO col_block = col_block_number[col];
1966 if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1967 columns(realnnz++) = col;
1972 rows_graph(row + 1) = realnnz;
1980 GO numGlobalTotal, numGlobalDropped;
1983 GetOStream(
Statistics1) <<
"Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped <<
"/" << numGlobalTotal;
1984 if (numGlobalTotal != 0)
1985 GetOStream(
Statistics1) <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)";
1989 if (localizeColoringGraph) {
1990 outputGraph =
rcp(
new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->
GetDomainMap(), inputGraph->
GetImportMap(),
"block-diagonalized graph of A"));
1994 #ifdef HAVE_XPETRA_TPETRA
1995 auto outputGraph2 =
rcp(
new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->
GetDomainMap(), inputGraph->
GetImportMap(),
"block-diagonalized graph of A"));
1997 auto tpGraph =
Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
1999 auto tpGraphSym = sym->symmetrize();
2000 auto lclGraphSym = tpGraphSym->getLocalGraphHost();
2001 auto colIndsSym = lclGraphSym.entries;
2003 auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
2004 typename LWGraph::row_type::non_const_type rows_graphSym(
"rows_graphSym", rowsSym.size());
2005 for (
size_t row = 0; row < rowsSym.size(); row++)
2006 rows_graphSym(row) = rowsSym(row);
2015 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
Important warning messages (one line)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
typename local_graph_type::row_map_type row_type
Exception indicating invalid cast attempted.
void reserve(size_type n)
void BlockDiagonalizeGraph(const RCP< LWGraph > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< LWGraph > &outputGraph, RCP< const Import > &importer) const
#define MueLu_sumAll(rcpComm, in, out)
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
void setValidator(RCP< const ParameterEntryValidator > const &validator)
T & get(const std::string &name, T def_value)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
DropTol & operator=(DropTol const &)=default
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level ¤tLevel, const RCP< Matrix > &A, bool generate_matrix) const
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
Exception throws to report incompatible objects (like maps).
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
Scalar GetThreshold() const
Return threshold value.
void resize(const size_type n, const T &val=T())
CoalesceDropFactory()
Constructor.
Kokkos::View< bool *, memory_space > boundary_nodes_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
#define SET_VALID_ENTRY(name)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void resize(size_type new_size, const value_type &x=value_type())
static Teuchos::RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
void Build(Level ¤tLevel) const
Build an object with this factory.
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
void DeclareInput(Level ¤tLevel) const
Input.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > toTpetra(const RCP< const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &graph)
void push_back(const value_type &x)
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
RCP< const CrsGraph< int, GlobalOrdinal, Node > > toXpetra(const Epetra_CrsGraph &g)
const RCP< const Map > GetDomainMap() const
Lightweight MueLu representation of a compressed row storage graph.
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
ParameterEntry & getEntry(const std::string &name)
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const