10 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
17 #include <Xpetra_MapFactory.hpp>
18 #include <Xpetra_Map.hpp>
20 #include <Xpetra_MultiVectorFactory.hpp>
22 #include <Xpetra_StridedMap.hpp>
28 #include <Kokkos_NestedSort.hpp>
29 #include <Kokkos_StdAlgorithms.hpp>
32 #include "MueLu_AmalgamationFactory.hpp"
33 #include "MueLu_AmalgamationInfo.hpp"
35 #include "MueLu_LWGraph.hpp"
38 #include "MueLu_LWGraph.hpp"
41 #include "MueLu_PreDropFunctionConstVal.hpp"
42 #include "MueLu_Utilities.hpp"
44 #ifdef HAVE_XPETRA_TPETRA
45 #include "Tpetra_CrsGraphTransposer.hpp"
59 template <
class real_type,
class LO>
68 DropTol(real_type val_, real_type diag_,
LO col_,
bool drop_)
89 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
93 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
101 SET_VALID_ENTRY(
"aggregation: distance laplacian directional weights");
106 validParamList->
getEntry(
"aggregation: drop scheme").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"signed classical sa",
"classical",
"distance laplacian",
"signed classical",
"block diagonal",
"block diagonal classical",
"block diagonal distance laplacian",
"block diagonal signed classical",
"block diagonal colored signed classical"))));
111 #undef SET_VALID_ENTRY
112 validParamList->
set<
bool>(
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
115 validParamList->
set<
RCP<const FactoryBase>>(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
119 return validParamList;
122 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
124 : predrop_(Teuchos::null) {}
126 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
128 Input(currentLevel,
"A");
129 Input(currentLevel,
"UnAmalgamationInfo");
132 if (pL.
get<
bool>(
"lightweight wrap") ==
true) {
133 std::string algo = pL.
get<std::string>(
"aggregation: drop scheme");
134 if (algo ==
"distance laplacian" || algo ==
"block diagonal distance laplacian") {
135 Input(currentLevel,
"Coordinates");
137 if (algo ==
"signed classical sa")
139 else if (algo.find(
"block diagonal") != std::string::npos || algo.find(
"signed classical") != std::string::npos) {
140 Input(currentLevel,
"BlockNumber");
145 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
150 typedef typename STS::magnitudeType real_type;
154 if (predrop_ != Teuchos::null)
155 GetOStream(
Parameters0) << predrop_->description();
157 RCP<Matrix> realA = Get<RCP<Matrix>>(currentLevel,
"A");
160 bool doExperimentalWrap = pL.get<
bool>(
"lightweight wrap");
162 GetOStream(
Parameters0) <<
"lightweight wrap = " << doExperimentalWrap << std::endl;
163 std::string algo = pL.get<std::string>(
"aggregation: drop scheme");
164 const bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
169 bool use_block_algorithm =
false;
170 LO interleaved_blocksize = as<LO>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize"));
171 bool useSignedClassicalRS =
false;
172 bool useSignedClassicalSA =
false;
173 bool generateColoringGraph =
false;
177 typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
181 if (algo ==
"distance laplacian") {
183 Coords = Get<RCP<RealValuedMultiVector>>(currentLevel,
"Coordinates");
185 }
else if (algo ==
"signed classical sa") {
186 useSignedClassicalSA =
true;
189 }
else if (algo ==
"signed classical" || algo ==
"block diagonal colored signed classical" || algo ==
"block diagonal signed classical") {
190 useSignedClassicalRS =
true;
195 if (!importer.is_null()) {
198 ghostedBlockNumber->doImport(*BlockNumber, *importer,
Xpetra::INSERT);
200 ghostedBlockNumber = BlockNumber;
203 if (algo ==
"block diagonal colored signed classical")
204 generateColoringGraph =
true;
208 }
else if (algo ==
"block diagonal") {
210 BlockDiagonalize(currentLevel, realA,
false);
212 }
else if (algo ==
"block diagonal classical" || algo ==
"block diagonal distance laplacian") {
214 use_block_algorithm =
true;
215 RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel, realA,
true);
216 if (algo ==
"block diagonal distance laplacian") {
219 if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
220 LO dim = (
LO)OldCoords->getNumVectors();
221 Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
222 for (
LO k = 0; k < dim; k++) {
225 for (
LO i = 0; i < (
LO)OldCoords->getLocalLength(); i++) {
226 LO new_base = i * dim;
227 for (
LO j = 0; j < interleaved_blocksize; j++)
228 new_vec[new_base + j] = old_vec[i];
234 algo =
"distance laplacian";
235 }
else if (algo ==
"block diagonal classical") {
247 enum { NO_WEIGHTS = 0,
250 int use_dlap_weights = NO_WEIGHTS;
251 if (algo ==
"distance laplacian") {
252 LO dim = (
LO)Coords->getNumVectors();
254 bool non_unity =
false;
255 for (
LO i = 0; !non_unity && i < (
LO)dlap_weights.
size(); i++) {
256 if (dlap_weights[i] != 1.0) {
261 LO blocksize = use_block_algorithm ? as<LO>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize")) : 1;
262 if ((
LO)dlap_weights.
size() == dim)
263 use_dlap_weights = SINGLE_WEIGHTS;
264 else if ((
LO)dlap_weights.
size() == blocksize * dim)
265 use_dlap_weights = BLOCK_WEIGHTS;
268 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
271 GetOStream(
Statistics1) <<
"Using distance laplacian weights: " << dlap_weights << std::endl;
286 if (doExperimentalWrap) {
292 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
293 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
295 threshold = as<SC>(pL.get<
double>(
"aggregation: drop tol"));
297 std::string distanceLaplacianAlgoStr = pL.get<std::string>(
"aggregation: distance laplacian algo");
298 std::string classicalAlgoStr = pL.get<std::string>(
"aggregation: classical algo");
299 real_type realThreshold = STS::magnitude(threshold);
303 #ifdef HAVE_MUELU_DEBUG
304 int distanceLaplacianCutVerbose = 0;
306 #ifdef DJS_READ_ENV_VARIABLES
307 if (getenv(
"MUELU_DROP_TOLERANCE_MODE")) {
308 distanceLaplacianAlgoStr = std::string(getenv(
"MUELU_DROP_TOLERANCE_MODE"));
311 if (getenv(
"MUELU_DROP_TOLERANCE_THRESHOLD")) {
312 auto tmp = atoi(getenv(
"MUELU_DROP_TOLERANCE_THRESHOLD"));
313 realThreshold = 1e-4 * tmp;
316 #ifdef HAVE_MUELU_DEBUG
317 if (getenv(
"MUELU_DROP_TOLERANCE_VERBOSE")) {
318 distanceLaplacianCutVerbose = atoi(getenv(
"MUELU_DROP_TOLERANCE_VERBOSE"));
326 if (algo ==
"distance laplacian") {
327 if (distanceLaplacianAlgoStr ==
"default")
329 else if (distanceLaplacianAlgoStr ==
"unscaled cut")
331 else if (distanceLaplacianAlgoStr ==
"scaled cut")
333 else if (distanceLaplacianAlgoStr ==
"scaled cut symmetric")
337 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
338 }
else if (algo ==
"classical") {
339 if (classicalAlgoStr ==
"default")
341 else if (classicalAlgoStr ==
"unscaled cut")
343 else if (classicalAlgoStr ==
"scaled cut")
347 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\" classical algorithm = \"" << classicalAlgoStr <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
350 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
352 if (((algo ==
"classical") && (classicalAlgoStr.find(
"scaled") != std::string::npos)) || ((algo ==
"distance laplacian") && (distanceLaplacianAlgoStr.find(
"scaled") != std::string::npos)))
355 Set<bool>(currentLevel,
"Filtering", (threshold != STS::zero()));
357 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
363 GO numDropped = 0, numTotal = 0;
364 std::string graphType =
"unamalgamated";
383 const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
386 if (algo ==
"classical") {
387 if (predrop_ == null) {
392 if (predrop_ != null) {
395 "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
398 if (newt != threshold) {
399 GetOStream(
Warnings0) <<
"switching threshold parameter from " << threshold <<
" (list) to " << newt <<
" (user function" << std::endl;
406 if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
415 numTotal = A->getLocalNumEntries();
418 GO numLocalBoundaryNodes = 0;
419 GO numGlobalBoundaryNodes = 0;
420 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
421 if (boundaryNodes[i])
422 numLocalBoundaryNodes++;
424 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
425 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
428 Set(currentLevel,
"DofsPerNode", 1);
429 Set(currentLevel,
"Graph", graph);
431 }
else if ((BlockSize == 1 && threshold != STS::zero()) ||
432 (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
433 (BlockSize == 1 && useSignedClassicalRS) ||
434 (BlockSize == 1 && useSignedClassicalSA)) {
440 typename LWGraph::row_type::non_const_type
rows(
"rows", A->getLocalNumRows() + 1);
441 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
443 using MT =
typename STS::magnitudeType;
448 if (useSignedClassicalRS) {
449 if (ghostedBlockNumber.
is_null()) {
451 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
453 GetOStream(
Statistics1) <<
"Calculated max point off-diagonal" << std::endl;
456 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
458 GetOStream(
Statistics1) <<
"Calculating max block off-diagonal" << std::endl;
463 ghostedDiagVals = ghostedDiag->getData(0);
467 if (rowSumTol > 0.) {
468 if (ghostedBlockNumber.
is_null()) {
470 GetOStream(
Statistics1) <<
"Applying point row sum criterion." << std::endl;
474 GetOStream(
Statistics1) <<
"Applying block row sum criterion." << std::endl;
480 if (!ghostedBlockNumber.
is_null())
481 g_block_id = ghostedBlockNumber->getData(0);
487 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
488 size_t nnz = A->getNumEntriesInLocalRow(row);
489 bool rowIsDirichlet = boundaryNodes[row];
492 A->getLocalRowView(row, indices, vals);
500 if (useSignedClassicalRS) {
502 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
503 LO col = indices[colID];
504 MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
505 MT neg_aij = -STS::real(vals[colID]);
510 if ((!rowIsDirichlet && (g_block_id.
is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
511 columns[realnnz++] = col;
516 rows(row + 1) = realnnz;
517 }
else if (useSignedClassicalSA) {
519 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
520 LO col = indices[colID];
522 bool is_nonpositive = STS::real(vals[colID]) <= 0;
523 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]);
524 MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID]));
530 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
531 columns(realnnz++) = col;
536 rows[row + 1] = realnnz;
539 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
540 LO col = indices[colID];
541 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]);
542 MT aij = STS::magnitude(vals[colID] * vals[colID]);
544 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
545 columns(realnnz++) = col;
550 rows(row + 1) = realnnz;
556 using ExecSpace =
typename Node::execution_space;
557 using TeamPol = Kokkos::TeamPolicy<ExecSpace>;
558 using TeamMem =
typename TeamPol::member_type;
559 using ATS = Kokkos::ArithTraits<Scalar>;
560 using impl_scalar_type =
typename ATS::val_type;
561 using implATS = Kokkos::ArithTraits<impl_scalar_type>;
564 auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadOnly), Kokkos::ALL(), 0);
565 auto thresholdKokkos =
static_cast<impl_scalar_type
>(threshold);
566 auto realThresholdKokkos = implATS::magnitude(thresholdKokkos);
567 auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns);
569 auto A_device = A->getLocalMatrixDevice();
575 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
579 boundaryColumnVector->doImport(*boundaryNodesVector, *importer,
Xpetra::INSERT);
581 boundaryColumnVector = boundaryNodesVector;
583 auto boundaryColumn = boundaryColumnVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
584 auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0);
586 Kokkos::View<LO*, ExecSpace> rownnzView(
"rownnzView", A_device.numRows());
587 auto drop_views = Kokkos::View<bool*, ExecSpace>(
"drop_views", A_device.nnz());
588 auto index_views = Kokkos::View<size_t*, ExecSpace>(
"index_views", A_device.nnz());
590 Kokkos::parallel_reduce(
591 "classical_cut", TeamPol(A_device.numRows(), Kokkos::AUTO), KOKKOS_LAMBDA(
const TeamMem& teamMember,
LO& globalnnz,
GO& totalDropped) {
592 LO row = teamMember.league_rank();
593 auto rowView = A_device.rowConst(row);
594 size_t nnz = rowView.length;
596 auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
597 auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
600 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (
LO)nnz), [&](
const LO colID) {
601 index_view(colID) = colID;
602 LO col = rowView.colidx(colID);
605 if (row == col || boundary(col)) {
606 drop_view(colID) =
true;
608 drop_view(colID) =
false;
612 size_t dropStart = nnz;
615 Kokkos::Experimental::sort_team(teamMember, index_view, [=](
size_t& x,
size_t& y) ->
bool {
616 if (drop_view(x) || drop_view(y)) {
617 return drop_view(x) < drop_view(y);
619 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
620 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
621 return x_aij > y_aij;
626 Kokkos::parallel_reduce(
627 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](
size_t i,
size_t& min) {
628 auto const& x = index_view(i - 1);
629 auto const& y = index_view(i);
630 typename implATS::magnitudeType x_aij = 0;
631 typename implATS::magnitudeType y_aij = 0;
633 x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
636 y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
639 if (realThresholdKokkos * realThresholdKokkos * x_aij > y_aij) {
645 Kokkos::Min<size_t>(dropStart));
648 Kokkos::Experimental::sort_team(teamMember, index_view, [=](
size_t& x,
size_t& y) ->
bool {
649 if (drop_view(x) || drop_view(y)) {
650 return drop_view(x) < drop_view(y);
652 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
653 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
654 auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
655 auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
656 return (x_aij / x_aiiajj) > (y_aij / y_aiiajj);
661 Kokkos::parallel_reduce(
662 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](
size_t i,
size_t& min) {
663 auto const& x = index_view(i - 1);
664 auto const& y = index_view(i);
665 typename implATS::magnitudeType x_val = 0;
666 typename implATS::magnitudeType y_val = 0;
668 typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
669 typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
670 x_val = x_aij / x_aiiajj;
673 typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
674 typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
675 y_val = y_aij / y_aiiajj;
678 if (realThresholdKokkos * realThresholdKokkos * x_val > y_val) {
684 Kokkos::Min<size_t>(dropStart));
688 if (dropStart < nnz) {
689 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](
size_t i) {
690 drop_view(index_view(i)) =
true;
696 Kokkos::parallel_reduce(
697 Kokkos::TeamThreadRange(teamMember, nnz), [=](
const size_t idxID,
LO& keep,
GO& drop) {
698 LO col = rowView.colidx(idxID);
700 if (row == col || !drop_view(idxID)) {
701 columnsDevice(A_device.graph.row_map(row) + idxID) = col;
704 columnsDevice(A_device.graph.row_map(row) + idxID) = -1;
711 totalDropped += rowDropped;
712 rownnzView(row) = rownnz;
714 realnnz, numDropped);
717 Kokkos::Experimental::remove(ExecSpace(), columnsDevice, -1);
718 Kokkos::deep_copy(columns, columnsDevice);
721 auto rowsDevice = Kokkos::create_mirror_view(ExecSpace(),
rows);
722 Kokkos::parallel_scan(
723 Kokkos::RangePolicy<ExecSpace>(0, A_device.numRows()), KOKKOS_LAMBDA(
const int i,
LO& partial_sum,
bool is_final) {
724 partial_sum += rownnzView(i);
725 if (is_final) rowsDevice(i + 1) = partial_sum;
727 Kokkos::deep_copy(
rows, rowsDevice);
730 numTotal = A->getLocalNumEntries();
732 if (aggregationMayCreateDirichlet) {
734 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
736 boundaryNodes[row] =
true;
740 RCP<LWGraph> graph =
rcp(
new LWGraph(
rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(),
"thresholded graph of A"));
743 GO numLocalBoundaryNodes = 0;
744 GO numGlobalBoundaryNodes = 0;
745 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
746 if (boundaryNodes(i))
747 numLocalBoundaryNodes++;
749 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
750 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
752 Set(currentLevel,
"Graph", graph);
753 Set(currentLevel,
"DofsPerNode", 1);
756 if (generateColoringGraph) {
759 BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer);
760 Set(currentLevel,
"Coloring Graph", colorGraph);
780 }
else if (BlockSize > 1 && threshold == STS::zero()) {
785 graphType =
"amalgamated";
793 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
794 Array<LO> colTranslation = *(amalInfo->getColTranslation());
797 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
800 typename LWGraph::row_type::non_const_type
rows(
"rows", numRows + 1);
801 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
804 Kokkos::deep_copy(amalgBoundaryNodes,
false);
816 LO blkSize = A->GetFixedBlockSize();
818 LO blkPartSize = A->GetFixedBlockSize();
819 if (A->IsView(
"stridedMaps") ==
true) {
823 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
824 blkId = strMap->getStridedBlockId();
826 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
833 for (
LO row = 0; row < numRows; row++) {
843 bool isBoundary =
false;
844 if (pL.get<
bool>(
"aggregation: greedy Dirichlet") ==
true) {
845 for (
LO j = 0; j < blkPartSize; j++) {
846 if (pointBoundaryNodes[row * blkPartSize + j]) {
853 for (
LO j = 0; j < blkPartSize; j++) {
854 if (!pointBoundaryNodes[row * blkPartSize + j]) {
864 MergeRows(*A, row, indicesExtra, colTranslation);
867 indices = indicesExtra;
868 numTotal += indices.
size();
872 LO nnz = indices.
size(), rownnz = 0;
873 for (
LO colID = 0; colID < nnz; colID++) {
874 LO col = indices[colID];
875 columns(realnnz++) = col;
886 amalgBoundaryNodes[row] =
true;
888 rows(row + 1) = realnnz;
891 RCP<LWGraph> graph =
rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
895 GO numLocalBoundaryNodes = 0;
896 GO numGlobalBoundaryNodes = 0;
898 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
899 if (amalgBoundaryNodes(i))
900 numLocalBoundaryNodes++;
903 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
904 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes
905 <<
" agglomerated Dirichlet nodes" << std::endl;
908 Set(currentLevel,
"Graph", graph);
909 Set(currentLevel,
"DofsPerNode", blkSize);
911 }
else if (BlockSize > 1 && threshold != STS::zero()) {
915 graphType =
"amalgamated";
923 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
924 Array<LO> colTranslation = *(amalInfo->getColTranslation());
927 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
930 typename LWGraph::row_type::non_const_type
rows(
"rows", numRows + 1);
931 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
934 Kokkos::deep_copy(amalgBoundaryNodes,
false);
945 LO blkSize = A->GetFixedBlockSize();
947 LO blkPartSize = A->GetFixedBlockSize();
948 if (A->IsView(
"stridedMaps") ==
true) {
952 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
953 blkId = strMap->getStridedBlockId();
955 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
966 for (
LO row = 0; row < numRows; row++) {
976 bool isBoundary =
false;
977 if (pL.get<
bool>(
"aggregation: greedy Dirichlet") ==
true) {
978 for (
LO j = 0; j < blkPartSize; j++) {
979 if (pointBoundaryNodes[row * blkPartSize + j]) {
986 for (
LO j = 0; j < blkPartSize; j++) {
987 if (!pointBoundaryNodes[row * blkPartSize + j]) {
997 MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
1000 indices = indicesExtra;
1001 numTotal += indices.
size();
1005 LO nnz = indices.
size(), rownnz = 0;
1006 for (
LO colID = 0; colID < nnz; colID++) {
1007 LO col = indices[colID];
1008 columns[realnnz++] = col;
1019 amalgBoundaryNodes[row] =
true;
1021 rows[row + 1] = realnnz;
1025 RCP<LWGraph> graph =
rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
1029 GO numLocalBoundaryNodes = 0;
1030 GO numGlobalBoundaryNodes = 0;
1032 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1033 if (amalgBoundaryNodes(i))
1034 numLocalBoundaryNodes++;
1037 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1038 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes
1039 <<
" agglomerated Dirichlet nodes" << std::endl;
1042 Set(currentLevel,
"Graph", graph);
1043 Set(currentLevel,
"DofsPerNode", blkSize);
1046 }
else if (algo ==
"distance laplacian") {
1047 LO blkSize = A->GetFixedBlockSize();
1048 GO indexBase = A->getRowMap()->getIndexBase();
1063 if ((blkSize == 1) && (threshold == STS::zero())) {
1067 graphType =
"unamalgamated";
1068 numTotal = A->getLocalNumEntries();
1071 GO numLocalBoundaryNodes = 0;
1072 GO numGlobalBoundaryNodes = 0;
1073 for (
size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1074 if (pointBoundaryNodes(i))
1075 numLocalBoundaryNodes++;
1077 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1078 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1081 Set(currentLevel,
"DofsPerNode", blkSize);
1082 Set(currentLevel,
"Graph", graph);
1098 "Coordinate vector length (" << Coords->getLocalLength() <<
") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() <<
") by modulo block size (" << blkSize <<
").");
1104 uniqueMap = A->getRowMap();
1105 nonUniqueMap = A->getColMap();
1106 graphType =
"unamalgamated";
1109 uniqueMap = Coords->getMap();
1111 "Different index bases for matrix and coordinates");
1115 graphType =
"amalgamated";
1117 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1122 if (threshold != STS::zero()) {
1127 if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1128 GetOStream(
Warnings1) <<
"Using existing importer from matrix graph" << std::endl;
1129 importer = realA->getCrsGraph()->getImporter();
1131 GetOStream(
Warnings0) <<
"Constructing new importer instance" << std::endl;
1132 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1142 RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1145 if (threshold != STS::zero()) {
1146 const size_t numVectors = ghostedCoords->getNumVectors();
1147 coordData.
reserve(numVectors);
1148 for (
size_t j = 0; j < numVectors; j++) {
1155 ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1156 for (
LO row = 0; row < numRows; row++) {
1161 A->getLocalRowView(row, indices, vals);
1166 MergeRows(*A, row, indicesExtra, colTranslation);
1167 indices = indicesExtra;
1171 bool haveAddedToDiag =
false;
1172 for (
LO colID = 0; colID < nnz; colID++) {
1173 const LO col = indices[colID];
1176 if (use_dlap_weights == SINGLE_WEIGHTS) {
1181 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1182 int block_id = row % interleaved_blocksize;
1183 int block_start = block_id * interleaved_blocksize;
1189 haveAddedToDiag =
true;
1194 if (!haveAddedToDiag)
1195 localLaplDiagData[row] = STS::rmax();
1200 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1201 ghostedLaplDiag->doImport(*localLaplDiag, *importer,
Xpetra::INSERT);
1202 ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1206 GetOStream(
Runtime0) <<
"Skipping distance laplacian construction due to 0 threshold" << std::endl;
1212 typename LWGraph::row_type::non_const_type
rows(
"rows", numRows + 1);
1213 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
1215 #ifdef HAVE_MUELU_DEBUG
1217 for (
LO i = 0; i < (
LO)columns.size(); i++) columns[i] = -666;
1222 bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo ==
scaled_cut_symmetric;
1225 rows_stop.
resize(numRows);
1228 Kokkos::deep_copy(amalgBoundaryNodes,
false);
1237 if (threshold != STS::zero()) {
1238 const size_t numVectors = ghostedCoords->getNumVectors();
1239 coordData.
reserve(numVectors);
1240 for (
size_t j = 0; j < numVectors; j++) {
1247 for (
LO row = 0; row < numRows; row++) {
1250 bool isBoundary =
false;
1254 A->getLocalRowView(row, indices, vals);
1255 isBoundary = pointBoundaryNodes[row];
1259 for (
LO j = 0; j < blkSize; j++) {
1260 if (!pointBoundaryNodes[row * blkSize + j]) {
1268 MergeRows(*A, row, indicesExtra, colTranslation);
1271 indices = indicesExtra;
1273 numTotal += indices.
size();
1275 LO nnz = indices.
size(), rownnz = 0;
1277 if (use_stop_array) {
1279 realnnz =
rows(row);
1282 if (threshold != STS::zero()) {
1286 for (
LO colID = 0; colID < nnz; colID++) {
1287 LO col = indices[colID];
1290 columns(realnnz++) = col;
1296 if (isBoundary)
continue;
1299 if (use_dlap_weights == SINGLE_WEIGHTS) {
1301 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1302 int block_id = row % interleaved_blocksize;
1303 int block_start = block_id * interleaved_blocksize;
1308 real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1309 real_type aij = STS::magnitude(laplVal * laplVal);
1312 columns(realnnz++) = col;
1321 std::vector<DropTol> drop_vec;
1322 drop_vec.reserve(nnz);
1327 for (
LO colID = 0; colID < nnz; colID++) {
1328 LO col = indices[colID];
1331 drop_vec.emplace_back(zero, one, colID,
false);
1335 if (isBoundary)
continue;
1338 if (use_dlap_weights == SINGLE_WEIGHTS) {
1340 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1341 int block_id = row % interleaved_blocksize;
1342 int block_start = block_id * interleaved_blocksize;
1348 real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1349 real_type aij = STS::magnitude(laplVal * laplVal);
1351 drop_vec.emplace_back(aij, aiiajj, colID,
false);
1354 const size_t n = drop_vec.size();
1357 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1358 return a.val > b.val;
1362 for (
size_t i = 1; i < n; ++i) {
1364 auto const& x = drop_vec[i - 1];
1365 auto const& y = drop_vec[i];
1368 if (realThreshold * realThreshold * a > b) {
1370 #ifdef HAVE_MUELU_DEBUG
1371 if (distanceLaplacianCutVerbose) {
1372 std::cout <<
"DJS: KEEP, N, ROW: " << i + 1 <<
", " << n <<
", " << row << std::endl;
1377 drop_vec[i].drop = drop;
1380 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1381 return a.val / a.diag > b.val / b.diag;
1385 for (
size_t i = 1; i < n; ++i) {
1387 auto const& x = drop_vec[i - 1];
1388 auto const& y = drop_vec[i];
1389 auto a = x.val / x.diag;
1390 auto b = y.val / y.diag;
1391 if (realThreshold * realThreshold * a > b) {
1393 #ifdef HAVE_MUELU_DEBUG
1394 if (distanceLaplacianCutVerbose) {
1395 std::cout <<
"DJS: KEEP, N, ROW: " << i + 1 <<
", " << n <<
", " << row << std::endl;
1400 drop_vec[i].drop = drop;
1404 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1405 return a.col < b.col;
1408 for (
LO idxID = 0; idxID < (
LO)drop_vec.size(); idxID++) {
1409 LO col = indices[drop_vec[idxID].col];
1413 columns(realnnz++) = col;
1419 if (!drop_vec[idxID].drop) {
1420 columns(realnnz++) = col;
1431 for (
LO colID = 0; colID < nnz; colID++) {
1432 LO col = indices[colID];
1433 columns(realnnz++) = col;
1445 amalgBoundaryNodes[row] =
true;
1449 rows_stop[row] = rownnz + rows[row];
1451 rows[row + 1] = realnnz;
1456 if (use_stop_array) {
1459 for (
LO row = 0; row < numRows; row++) {
1460 for (
LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1461 LO col = columns[colidx];
1462 if (col >= numRows)
continue;
1465 for (
LO t_col =
rows(col); !found && t_col < rows_stop[col]; t_col++) {
1466 if (columns[t_col] == row)
1471 if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1472 LO new_idx = rows_stop[col];
1474 columns[new_idx] = row;
1482 LO current_start = 0;
1483 for (
LO row = 0; row < numRows; row++) {
1484 LO old_start = current_start;
1485 for (
LO col =
rows(row); col < rows_stop[row]; col++) {
1486 if (current_start != col) {
1487 columns(current_start) = columns(col);
1491 rows[row] = old_start;
1493 rows(numRows) = realnnz = current_start;
1499 graph =
rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
1504 GO numLocalBoundaryNodes = 0;
1505 GO numGlobalBoundaryNodes = 0;
1507 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1508 if (amalgBoundaryNodes(i))
1509 numLocalBoundaryNodes++;
1512 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1513 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" agglomerated Dirichlet nodes"
1514 <<
" using threshold " << dirichletThreshold << std::endl;
1517 Set(currentLevel,
"Graph", graph);
1518 Set(currentLevel,
"DofsPerNode", blkSize);
1524 GO numGlobalTotal, numGlobalDropped;
1527 GetOStream(
Statistics1) <<
"Number of dropped entries in " << graphType <<
" matrix graph: " << numGlobalDropped <<
"/" << numGlobalTotal;
1528 if (numGlobalTotal != 0)
1529 GetOStream(
Statistics1) <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)";
1536 SC threshold = as<SC>(pL.get<
double>(
"aggregation: drop tol"));
1538 GetOStream(
Runtime0) <<
"algorithm = \""
1540 <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
1541 Set<bool>(currentLevel,
"Filtering", (threshold != STS::zero()));
1547 GO indexBase = rowMap->getIndexBase();
1551 if (A->IsView(
"stridedMaps") &&
1552 Teuchos::rcp_dynamic_cast<
const StridedMap>(A->getRowMap(
"stridedMaps")) != Teuchos::null) {
1556 blockdim = strMap->getFixedBlockSize();
1557 offset = strMap->getOffset();
1558 oldView = A->SwitchToView(oldView);
1559 GetOStream(
Statistics1) <<
"CoalesceDropFactory::Build():"
1560 <<
" found blockdim=" << blockdim <<
" from strided maps. offset=" << offset << std::endl;
1562 GetOStream(
Statistics1) <<
"CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1567 GetOStream(
Statistics1) <<
"CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() <<
"/" << nodeMap->getGlobalNumElements() <<
" elements" << std::endl;
1570 RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1572 LO numRows = A->getRowMap()->getLocalNumElements();
1573 LO numNodes = nodeMap->getLocalNumElements();
1575 Kokkos::deep_copy(amalgBoundaryNodes,
false);
1576 const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0);
1577 bool bIsDiagonalEntry =
false;
1582 for (
LO row = 0; row < numRows; row++) {
1584 GO grid = rowMap->getGlobalElement(row);
1587 bIsDiagonalEntry =
false;
1592 size_t nnz = A->getNumEntriesInLocalRow(row);
1595 A->getLocalRowView(row, indices, vals);
1599 for (
LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1600 GO gcid = colMap->getGlobalElement(indices[col]);
1602 if (vals[col] != STS::zero()) {
1604 cnodeIds->push_back(cnodeId);
1606 if (grid == gcid) bIsDiagonalEntry =
true;
1610 if (realnnz == 1 && bIsDiagonalEntry ==
true) {
1611 LO lNodeId = nodeMap->getLocalElement(nodeId);
1612 numberDirichletRowsPerNode[lNodeId] += 1;
1613 if (numberDirichletRowsPerNode[lNodeId] == blockdim)
1614 amalgBoundaryNodes[lNodeId] =
true;
1619 if (arr_cnodeIds.
size() > 0)
1620 crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1623 crsGraph->fillComplete(nodeMap, nodeMap);
1632 GO numLocalBoundaryNodes = 0;
1633 GO numGlobalBoundaryNodes = 0;
1634 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1635 if (amalgBoundaryNodes(i))
1636 numLocalBoundaryNodes++;
1638 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1639 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1644 Set(currentLevel,
"DofsPerNode", blockdim);
1645 Set(currentLevel,
"Graph", graph);
1651 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1656 LO blkSize = A.GetFixedBlockSize();
1657 if (A.IsView(
"stridedMaps") ==
true) {
1661 if (strMap->getStridedBlockId() > -1)
1662 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1666 size_t nnz = 0, pos = 0;
1667 for (
LO j = 0; j < blkSize; j++)
1668 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1680 for (
LO j = 0; j < blkSize; j++) {
1681 A.getLocalRowView(row * blkSize + j, inds, vals);
1682 size_type numIndices = inds.
size();
1684 if (numIndices == 0)
1688 cols[pos++] = translation[inds[0]];
1689 for (size_type k = 1; k < numIndices; k++) {
1690 LO nodeID = translation[inds[k]];
1694 if (nodeID != cols[pos - 1])
1695 cols[pos++] = nodeID;
1702 std::sort(cols.
begin(), cols.
end());
1704 for (
size_t j = 1; j < nnz; j++)
1705 if (cols[j] != cols[pos])
1706 cols[++pos] = cols[j];
1710 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1716 LO blkSize = A.GetFixedBlockSize();
1717 if (A.IsView(
"stridedMaps") ==
true) {
1721 if (strMap->getStridedBlockId() > -1)
1722 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1726 size_t nnz = 0, pos = 0;
1727 for (
LO j = 0; j < blkSize; j++)
1728 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1740 for (
LO j = 0; j < blkSize; j++) {
1741 A.getLocalRowView(row * blkSize + j, inds, vals);
1742 size_type numIndices = inds.
size();
1744 if (numIndices == 0)
1749 for (size_type k = 0; k < numIndices; k++) {
1751 LO nodeID = translation[inds[k]];
1754 typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]);
1755 typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]);
1758 if (aij > aiiajj || (row * blkSize + j == dofID)) {
1764 if (nodeID != prevNodeID) {
1765 cols[pos++] = nodeID;
1766 prevNodeID = nodeID;
1775 std::sort(cols.
begin(), cols.
end());
1777 for (
size_t j = 1; j < nnz; j++)
1778 if (cols[j] != cols[pos])
1779 cols[++pos] = cols[j];
1785 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1790 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.
get<
double>(
"aggregation: Dirichlet threshold")));
1791 const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.
get<
double>(
"aggregation: row sum drop tol"));
1795 GetOStream(
Statistics1) <<
"Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
1802 ghostedBlockNumber->doImport(*BlockNumber, *importer,
Xpetra::INSERT);
1804 ghostedBlockNumber = BlockNumber;
1812 typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type rows_mat;
1813 typename LWGraph::row_type::non_const_type rows_graph;
1814 typename LWGraph::entries_type::non_const_type columns;
1815 typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type values;
1818 if (generate_matrix) {
1819 crs_matrix_wrap =
rcp(
new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1820 rows_mat =
typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type(
"rows_mat", A->getLocalNumRows() + 1);
1822 rows_graph =
typename LWGraph::row_type::non_const_type(
"rows_graph", A->getLocalNumRows() + 1);
1824 columns =
typename LWGraph::entries_type::non_const_type(
"columns", A->getLocalNumEntries());
1825 values =
typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type(
"values", A->getLocalNumEntries());
1828 GO numDropped = 0, numTotal = 0;
1829 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1830 LO row_block = row_block_number[row];
1831 size_t nnz = A->getNumEntriesInLocalRow(row);
1834 A->getLocalRowView(row, indices, vals);
1837 for (
LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1838 LO col = indices[colID];
1839 LO col_block = col_block_number[col];
1841 if (row_block == col_block) {
1842 if (generate_matrix) values[realnnz] = vals[colID];
1843 columns[realnnz++] = col;
1848 if (generate_matrix)
1849 rows_mat[row + 1] = realnnz;
1851 rows_graph[row + 1] = realnnz;
1858 numTotal = A->getLocalNumEntries();
1861 GO numLocalBoundaryNodes = 0;
1862 GO numGlobalBoundaryNodes = 0;
1863 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
1864 if (boundaryNodes(i))
1865 numLocalBoundaryNodes++;
1867 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1868 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1870 GO numGlobalTotal, numGlobalDropped;
1873 GetOStream(
Statistics1) <<
"Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped <<
"/" << numGlobalTotal;
1874 if (numGlobalTotal != 0)
1875 GetOStream(
Statistics1) <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)";
1879 Set(currentLevel,
"Filtering",
true);
1881 if (generate_matrix) {
1886 typename CrsMatrix::local_matrix_type::row_map_type>::value) {
1887 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values);
1889 auto rows_mat2 =
typename CrsMatrix::local_matrix_type::row_map_type::non_const_type(
"rows_mat2", rows_mat.extent(0));
1890 Kokkos::deep_copy(rows_mat2, rows_mat);
1891 auto columns2 =
typename CrsMatrix::local_graph_type::entries_type::non_const_type(
"columns2", columns.extent(0));
1892 Kokkos::deep_copy(columns2, columns);
1893 auto values2 =
typename CrsMatrix::local_matrix_type::values_type::non_const_type(
"values2", values.extent(0));
1894 Kokkos::deep_copy(values2, values);
1895 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat2, columns2, values2);
1897 crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1899 RCP<LWGraph> graph =
rcp(
new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(),
"block-diagonalized graph of A"));
1901 Set(currentLevel,
"Graph", graph);
1904 Set(currentLevel,
"DofsPerNode", 1);
1905 return crs_matrix_wrap;
1908 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1913 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
1915 GetOStream(
Statistics1) <<
"Using BlockDiagonal Graph after Dropping (with provided blocking)";
1916 if (localizeColoringGraph)
1917 GetOStream(
Statistics1) <<
", with localization" << std::endl;
1919 GetOStream(
Statistics1) <<
", without localization" << std::endl;
1927 typename LWGraph::row_type::non_const_type rows_graph(
"rows_graph", inputGraph->
GetNodeNumVertices() + 1);
1928 typename LWGraph::entries_type::non_const_type columns(
"columns", inputGraph->
GetNodeNumEdges());
1931 GO numDropped = 0, numTotal = 0;
1932 const LO numRows = Teuchos::as<LO>(inputGraph->
GetDomainMap()->getLocalNumElements());
1933 if (localizeColoringGraph) {
1934 for (
LO row = 0; row < numRows; ++row) {
1935 LO row_block = row_block_number[row];
1939 for (
LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1940 LO col = indices(colID);
1941 LO col_block = col_block_number[col];
1943 if ((row_block == col_block) && (col < numRows)) {
1944 columns(realnnz++) = col;
1949 rows_graph(row + 1) = realnnz;
1956 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1959 boundaryColumnVector->doImport(*boundaryNodesVector, *importer,
Xpetra::INSERT);
1960 auto boundaryColumn = boundaryColumnVector->getData(0);
1962 for (
LO row = 0; row < numRows; ++row) {
1963 LO row_block = row_block_number[row];
1967 for (
LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1968 LO col = indices(colID);
1969 LO col_block = col_block_number[col];
1971 if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1972 columns(realnnz++) = col;
1977 rows_graph(row + 1) = realnnz;
1985 GO numGlobalTotal, numGlobalDropped;
1988 GetOStream(
Statistics1) <<
"Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped <<
"/" << numGlobalTotal;
1989 if (numGlobalTotal != 0)
1990 GetOStream(
Statistics1) <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)";
1994 if (localizeColoringGraph) {
1995 outputGraph =
rcp(
new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->
GetDomainMap(), inputGraph->
GetImportMap(),
"block-diagonalized graph of A"));
1999 #ifdef HAVE_XPETRA_TPETRA
2000 auto outputGraph2 =
rcp(
new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->
GetDomainMap(), inputGraph->
GetImportMap(),
"block-diagonalized graph of A"));
2002 auto tpGraph =
Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
2004 auto tpGraphSym = sym->symmetrize();
2005 auto lclGraphSym = tpGraphSym->getLocalGraphHost();
2006 auto colIndsSym = lclGraphSym.entries;
2008 auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
2009 typename LWGraph::row_type::non_const_type rows_graphSym(
"rows_graphSym", rowsSym.size());
2010 for (
size_t row = 0; row < rowsSym.size(); row++)
2011 rows_graphSym(row) = rowsSym(row);
2020 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
Important warning messages (one line)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
typename local_graph_type::row_map_type row_type
Exception indicating invalid cast attempted.
void reserve(size_type n)
void BlockDiagonalizeGraph(const RCP< LWGraph > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< LWGraph > &outputGraph, RCP< const Import > &importer) const
static Teuchos::RCP< Vector > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
#define MueLu_sumAll(rcpComm, in, out)
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
void setValidator(RCP< const ParameterEntryValidator > const &validator)
T & get(const std::string &name, T def_value)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
DropTol & operator=(DropTol const &)=default
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level ¤tLevel, const RCP< Matrix > &A, bool generate_matrix) const
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
Exception throws to report incompatible objects (like maps).
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
Scalar GetThreshold() const
Return threshold value.
void resize(const size_type n, const T &val=T())
CoalesceDropFactory()
Constructor.
Kokkos::View< bool *, memory_space > boundary_nodes_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
#define SET_VALID_ENTRY(name)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void resize(size_type new_size, const value_type &x=value_type())
void Build(Level ¤tLevel) const
Build an object with this factory.
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
void DeclareInput(Level ¤tLevel) const
Input.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > toTpetra(const RCP< const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &graph)
void push_back(const value_type &x)
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
RCP< const CrsGraph< int, GlobalOrdinal, Node > > toXpetra(const Epetra_CrsGraph &g)
const RCP< const Map > GetDomainMap() const
Lightweight MueLu representation of a compressed row storage graph.
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
ParameterEntry & getEntry(const std::string &name)
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const