10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
23 #include "MueLu_AmalgamationInfo.hpp"
26 #include "MueLu_LWGraph_kokkos.hpp"
29 #include "MueLu_Utilities.hpp"
39 #include "MueLu_ScalarDroppingClassical.hpp"
40 #include "MueLu_ScalarDroppingDistanceLaplacian.hpp"
42 #include "MueLu_VectorDroppingClassical.hpp"
43 #include "MueLu_VectorDroppingDistanceLaplacian.hpp"
47 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
51 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
65 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
80 SET_VALID_ENTRY(
"filtered matrix: spread lumping diag dom growth factor");
84 #undef SET_VALID_ENTRY
85 validParamList->
set<
bool>(
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
86 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
89 validParamList->
getEntry(
"aggregation: drop scheme").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"point-wise",
"cut-drop",
"signed classical sa",
"classical",
"distance laplacian",
"signed classical",
"block diagonal",
"block diagonal classical",
"block diagonal distance laplacian",
"block diagonal signed classical",
"block diagonal colored signed classical",
"signed classical distance laplacian",
"signed classical sa distance laplacian"))));
94 validParamList->
getEntry(
"aggregation: strength-of-connection: measure").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"smoothed aggregation",
"signed smoothed aggregation",
"signed ruge-stueben",
"unscaled"))));
98 validParamList->
set<
RCP<const FactoryBase>>(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
103 return validParamList;
106 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
108 Input(currentLevel,
"A");
109 Input(currentLevel,
"UnAmalgamationInfo");
113 std::string socUsesMatrix = pL.
get<std::string>(
"aggregation: strength-of-connection: matrix");
114 bool needCoords = (socUsesMatrix ==
"distance laplacian");
115 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
116 std::string droppingMethod = pL.
get<std::string>(
"aggregation: drop scheme");
117 needCoords |= (droppingMethod.find(
"distance laplacian") != std::string::npos);
120 Input(currentLevel,
"Coordinates");
121 std::string distLaplMetric = pL.
get<std::string>(
"aggregation: distance laplacian metric");
122 if (distLaplMetric ==
"material")
123 Input(currentLevel,
"Material");
126 bool useBlocking = pL.
get<
bool>(
"aggregation: use blocking");
127 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
128 useBlocking |= (droppingMethod.find(
"block diagonal") != std::string::npos);
131 Input(currentLevel,
"BlockNumber");
135 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
138 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
140 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
142 std::tuple<GlobalOrdinal, boundary_nodes_type> results;
144 results = BuildScalar(currentLevel);
146 results = BuildVector(currentLevel);
150 auto boundaryNodes = std::get<1>(results);
152 GO numLocalBoundaryNodes = 0;
154 Kokkos::parallel_reduce(
155 "MueLu:CoalesceDropF:Build:bnd",
range_type(0, boundaryNodes.extent(0)),
156 KOKKOS_LAMBDA(
const LO i,
GO& n) {
157 if (boundaryNodes(i))
160 numLocalBoundaryNodes);
163 auto comm = A->getRowMap()->getComm();
165 std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
166 std::vector<GlobalOrdinal> globalStats(2);
169 GO numGlobalTotal = A->getGlobalNumEntries();
170 GO numGlobalBoundaryNodes = globalStats[0];
171 GO numGlobalDropped = globalStats[1];
173 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
174 if (numGlobalTotal != 0) {
175 GetOStream(
Statistics1) <<
"Number of dropped entries: "
176 << numGlobalDropped <<
"/" << numGlobalTotal
177 <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)" << std::endl;
183 template <
class local_matrix_type,
class boundary_nodes_view,
class... Functors>
184 void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
185 using local_ordinal_type =
typename local_matrix_type::ordinal_type;
186 using execution_space =
typename local_matrix_type::execution_space;
187 using range_type = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
188 auto range = range_type(0, boundaryNodes.extent(0));
190 Kokkos::parallel_for(
"CoalesceDrop::BoundaryDetection", range, boundaries);
193 template <
class magnitudeType>
195 std::set<std::string> validDroppingMethods = {
"piece-wise",
"cut-drop"};
199 if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
200 std::string algo = droppingMethod;
201 std::string classicalAlgoStr = pL.
get<std::string>(
"aggregation: classical algo");
202 std::string distanceLaplacianAlgoStr = pL.
get<std::string>(
"aggregation: distance laplacian algo");
205 if (algo.find(
"block diagonal") == 0) {
207 algo = algo.substr(14);
209 algo = algo.substr(1);
213 if ((algo ==
"classical") || (algo ==
"signed classical sa") || (algo ==
"signed classical") || (algo ==
"colored signed classical")) {
216 if (algo ==
"classical") {
217 socUsesMeasure =
"smoothed aggregation";
218 }
else if (algo ==
"signed classical sa") {
219 socUsesMeasure =
"signed smoothed aggregation";
220 }
else if (algo ==
"signed classical") {
221 socUsesMeasure =
"signed ruge-stueben";
222 }
else if (algo ==
"colored signed classical") {
223 socUsesMeasure =
"signed ruge-stueben";
224 generateColoringGraph =
true;
227 if (classicalAlgoStr ==
"default")
228 droppingMethod =
"point-wise";
229 else if (classicalAlgoStr ==
"unscaled cut") {
230 socUsesMeasure =
"unscaled";
231 droppingMethod =
"cut-drop";
232 }
else if (classicalAlgoStr ==
"scaled cut") {
233 droppingMethod =
"cut-drop";
234 }
else if (classicalAlgoStr ==
"scaled cut symmetric") {
235 droppingMethod =
"cut-drop";
236 symmetrizeDroppedGraph =
true;
238 }
else if ((algo ==
"distance laplacian") || (algo ==
"signed classical sa distance laplacian") || (algo ==
"signed classical distance laplacian")) {
239 socUsesMatrix =
"distance laplacian";
241 if (algo ==
"distance laplacian") {
242 socUsesMeasure =
"smoothed aggregation";
243 }
else if (algo ==
"signed classical sa distance laplacian") {
244 socUsesMeasure =
"signed smoothed aggregation";
245 }
else if (algo ==
"signed classical distance laplacian") {
246 socUsesMeasure =
"signed ruge-stueben";
249 if (distanceLaplacianAlgoStr ==
"default")
250 droppingMethod =
"point-wise";
251 else if (distanceLaplacianAlgoStr ==
"unscaled cut") {
252 socUsesMeasure =
"unscaled";
253 droppingMethod =
"cut-drop";
254 }
else if (distanceLaplacianAlgoStr ==
"scaled cut") {
255 droppingMethod =
"cut-drop";
256 }
else if (distanceLaplacianAlgoStr ==
"scaled cut symmetric") {
257 droppingMethod =
"cut-drop";
258 symmetrizeDroppedGraph =
true;
260 }
else if (algo ==
"") {
268 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
275 using local_matrix_type =
typename MatrixType::local_matrix_type;
276 using local_graph_type =
typename GraphType::local_graph_type;
277 using rowptr_type =
typename local_graph_type::row_map_type::non_const_type;
278 using entries_type =
typename local_graph_type::entries_type::non_const_type;
279 using values_type =
typename local_matrix_type::values_type::non_const_type;
280 using device_type =
typename Node::device_type;
281 using memory_space =
typename device_type::memory_space;
282 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
289 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
296 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
297 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
301 bool useBlocking = pL.get<
bool>(
"aggregation: use blocking");
302 std::string droppingMethod = pL.get<std::string>(
"aggregation: drop scheme");
303 std::string socUsesMatrix = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
304 std::string socUsesMeasure = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
305 std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
306 bool symmetrizeDroppedGraph = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
307 magnitudeType threshold;
309 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
310 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
312 threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
313 bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
316 const bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
317 const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
319 const bool useRootStencil = pL.get<
bool>(
"filtered matrix: use root stencil");
320 const bool useSpreadLumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
321 const std::string lumpingChoiceString = pL.get<std::string>(
"filtered matrix: lumping choice");
323 if (lumpingChoiceString ==
"diag lumping")
325 else if (lumpingChoiceString ==
"distributed lumping")
328 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
331 bool generateColoringGraph = pL.get<
bool>(
"aggregation: coloring: use color graph");
332 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
333 const bool symmetrizeColoringGraph =
true;
335 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
336 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
340 std::stringstream ss;
341 ss <<
"dropping scheme = \"" << droppingMethod <<
"\", strength-of-connection measure = \"" << socUsesMeasure <<
"\", strength-of-connection matrix = \"" << socUsesMatrix <<
"\", ";
342 if (socUsesMatrix ==
"distance laplacian")
343 ss <<
"distance laplacian metric = \"" << distanceLaplacianMetric <<
"\", ";
344 ss <<
"threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() <<
", useBlocking = " << useBlocking;
345 ss <<
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
353 if (droppingMethod ==
"cut-drop")
367 auto crsA = toCrsMatrix(A);
368 auto lclA = crsA->getLocalMatrixDevice();
388 if (rowSumTol <= 0.) {
421 auto filtered_rowptr = rowptr_type(
"filtered_rowptr", lclA.numRows() + 1);
425 auto results = results_view_type(
"results", lclA.nnz());
429 if (threshold != zero) {
430 if (socUsesMatrix ==
"A") {
431 if (socUsesMeasure ==
"unscaled") {
432 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
433 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
434 }
else if (socUsesMeasure ==
"smoothed aggregation") {
435 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
436 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
437 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
438 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
439 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
440 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
441 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
442 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
444 }
else if (socUsesMatrix ==
"distance laplacian") {
445 auto coords = Get<RCP<doubleMultiVector>>(currentLevel,
"Coordinates");
446 if (socUsesMeasure ==
"unscaled") {
447 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
448 }
else if (socUsesMeasure ==
"smoothed aggregation") {
449 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
450 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
451 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
452 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
453 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
457 Kokkos::deep_copy(results,
KEEP);
459 if (symmetrizeDroppedGraph) {
461 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, drop_boundaries);
464 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, no_op);
468 if (symmetrizeDroppedGraph) {
470 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, symmetrize);
473 GO numDropped = lclA.nnz() - nnz_filtered;
486 local_matrix_type lclFilteredA;
487 local_graph_type lclGraph;
489 filteredA = MatrixFactory::BuildCopy(A);
490 lclFilteredA = filteredA->getLocalMatrixDevice();
492 auto colidx = entries_type(
"entries", nnz_filtered);
493 lclGraph = local_graph_type(colidx, filtered_rowptr);
495 auto colidx = entries_type(
"entries", nnz_filtered);
496 auto values = values_type(
"values", nnz_filtered);
497 lclFilteredA = local_matrix_type(
"filteredA",
498 lclA.numRows(), lclA.numCols(),
500 values, filtered_rowptr, colidx);
506 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
510 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
513 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
519 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
522 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
527 filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
528 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
530 if (reuseEigenvalue) {
535 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
542 lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
544 graph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"amalgamated graph of A"));
549 if (generateColoringGraph) {
550 SubFactoryMonitor mColoringGraph(*
this,
"Construct coloring graph", currentLevel);
552 filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
553 if (localizeColoringGraph) {
555 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, drop_offrank);
557 if (symmetrizeColoringGraph) {
559 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, symmetrize);
561 auto colidx = entries_type(
"entries_coloring_graph", nnz_filtered);
562 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
564 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
566 auto colorGraph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"coloring graph"));
567 Set(currentLevel,
"Coloring Graph", colorGraph);
571 Set(currentLevel,
"DofsPerNode", dofsPerNode);
572 Set(currentLevel,
"Graph", graph);
573 Set(currentLevel,
"A", filteredA);
575 return std::make_tuple(numDropped, boundaryNodes);
578 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
585 using local_matrix_type =
typename MatrixType::local_matrix_type;
586 using local_graph_type =
typename GraphType::local_graph_type;
587 using rowptr_type =
typename local_graph_type::row_map_type::non_const_type;
588 using entries_type =
typename local_graph_type::entries_type::non_const_type;
589 using values_type =
typename local_matrix_type::values_type::non_const_type;
590 using device_type =
typename Node::device_type;
591 using memory_space =
typename device_type::memory_space;
592 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
599 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
619 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
621 auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel,
"UnAmalgamationInfo");
633 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation());
634 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
636 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
637 rowTranslationView(rowTranslationArray.
getRawPtr(), rowTranslationArray.
size());
638 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
639 colTranslationView(colTranslationArray.
getRawPtr(), colTranslationArray.
size());
642 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
643 typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
644 id_translation_type rowTranslation(
"dofId2nodeId", rowTranslationArray.
size());
645 id_translation_type colTranslation(
"ov_dofId2nodeId", colTranslationArray.
size());
646 Kokkos::deep_copy(rowTranslation, rowTranslationView);
647 Kokkos::deep_copy(colTranslation, colTranslationView);
650 blkSize = A->GetFixedBlockSize();
653 if (A->IsView(
"stridedMaps") ==
true) {
657 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
658 blkId = strMap->getStridedBlockId();
660 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
670 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
671 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
673 const bool useGreedyDirichlet = pL.get<
bool>(
"aggregation: greedy Dirichlet");
677 bool useBlocking = pL.get<
bool>(
"aggregation: use blocking");
678 std::string droppingMethod = pL.get<std::string>(
"aggregation: drop scheme");
679 std::string socUsesMatrix = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
680 std::string socUsesMeasure = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
681 std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
682 bool symmetrizeDroppedGraph = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
683 magnitudeType threshold;
685 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
686 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
688 threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
689 bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
692 const bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
693 const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
695 const bool useRootStencil = pL.get<
bool>(
"filtered matrix: use root stencil");
696 const bool useSpreadLumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
697 const std::string lumpingChoiceString = pL.get<std::string>(
"filtered matrix: lumping choice");
699 if (lumpingChoiceString ==
"diag lumping")
701 else if (lumpingChoiceString ==
"distributed lumping")
704 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
707 bool generateColoringGraph = pL.get<
bool>(
"aggregation: coloring: use color graph");
708 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
709 const bool symmetrizeColoringGraph =
true;
711 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
712 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
715 std::stringstream ss;
716 ss <<
"dropping scheme = \"" << droppingMethod <<
"\", strength-of-connection measure = \"" << socUsesMeasure <<
"\", strength-of-connection matrix = \"" << socUsesMatrix <<
"\", ";
717 if (socUsesMatrix ==
"distance laplacian")
718 ss <<
"distance laplacian metric = \"" << distanceLaplacianMetric <<
"\", ";
719 ss <<
"threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() <<
", useBlocking = " << useBlocking;
720 ss <<
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
728 if (droppingMethod ==
"cut-drop")
729 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0,
Exceptions::RuntimeError,
"For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold <<
", needs to be <= 1.0");
742 auto crsA = toCrsMatrix(A);
743 auto lclA = crsA->getLocalMatrixDevice();
758 if (useGreedyDirichlet) {
788 auto filtered_rowptr = rowptr_type(
"rowptr", lclA.numRows() + 1);
789 auto graph_rowptr = rowptr_type(
"rowptr", numNodes + 1);
791 Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
794 auto results = results_view_type(
"results", lclA.nnz());
803 auto merged_rowptr = rowptr_type(
"rowptr", numNodes + 1);
807 Kokkos::parallel_scan(
"MergeCount", range, functor, nnz_merged);
809 local_graph_type lclMergedGraph;
810 auto colidx_merged = entries_type(
"entries", nnz_merged);
811 auto values_merged = values_type(
"values", nnz_merged);
813 local_matrix_type lclMergedA = local_matrix_type(
"mergedA",
814 numNodes, nonUniqueMap->getLocalNumElements(),
816 values_merged, merged_rowptr, colidx_merged);
819 Kokkos::parallel_for(
"MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
824 if (threshold != zero) {
825 if (socUsesMatrix ==
"A") {
826 if (socUsesMeasure ==
"unscaled") {
827 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
828 }
else if (socUsesMeasure ==
"smoothed aggregation") {
829 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
830 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
831 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
832 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
833 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
835 }
else if (socUsesMatrix ==
"distance laplacian") {
836 auto coords = Get<RCP<doubleMultiVector>>(currentLevel,
"Coordinates");
839 LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize"));
840 if (socUsesMeasure ==
"distance laplacian") {
841 LO dim = (
LO)coords->getNumVectors();
843 bool non_unity =
false;
844 for (
LO i = 0; !non_unity && i < (
LO)dlap_weights.size(); i++) {
845 if (dlap_weights[i] != 1.0) {
850 if ((
LO)dlap_weights.size() == dim) {
851 distanceLaplacianMetric =
"weighted";
852 }
else if ((
LO)dlap_weights.size() == interleaved_blocksize * dim)
853 distanceLaplacianMetric =
"block weighted";
856 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
859 GetOStream(
Statistics1) <<
"Using distance laplacian weights: " << dlap_weights << std::endl;
863 if (socUsesMeasure ==
"unscaled") {
864 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
865 }
else if (socUsesMeasure ==
"smoothed aggregation") {
866 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
867 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
868 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
869 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
870 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
874 Kokkos::deep_copy(results,
KEEP);
877 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, no_op);
880 if (symmetrizeDroppedGraph) {
882 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, symmetrize);
887 GO numTotal = lclA.nnz();
888 GO numDropped = numTotal - nnz_filtered;
901 local_matrix_type lclFilteredA;
903 lclFilteredA = local_matrix_type(
"filteredA", lclA.graph, lclA.numCols());
905 auto colidx = entries_type(
"entries", nnz_filtered);
906 auto values = values_type(
"values", nnz_filtered);
907 lclFilteredA = local_matrix_type(
"filteredA",
908 lclA.numRows(), lclA.numCols(),
910 values, filtered_rowptr, colidx);
913 local_graph_type lclGraph;
915 auto colidx = entries_type(
"entries", nnz_graph);
916 lclGraph = local_graph_type(colidx, graph_rowptr);
922 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
925 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
930 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
933 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
938 filteredA->SetFixedBlockSize(blkSize);
940 if (reuseEigenvalue) {
945 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
950 graph =
rcp(
new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
955 if (generateColoringGraph) {
956 SubFactoryMonitor mColoringGraph(*
this,
"Construct coloring graph", currentLevel);
958 filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
959 graph_rowptr = rowptr_type(
"rowptr", numNodes + 1);
960 if (localizeColoringGraph) {
962 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, drop_offrank);
964 if (symmetrizeColoringGraph) {
966 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, symmetrize);
968 auto colidx = entries_type(
"entries_coloring_graph", nnz_filtered);
969 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
971 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
973 auto colorGraph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"coloring graph"));
974 Set(currentLevel,
"Coloring Graph", colorGraph);
977 LO dofsPerNode = blkSize;
979 Set(currentLevel,
"DofsPerNode", dofsPerNode);
980 Set(currentLevel,
"Graph", graph);
981 Set(currentLevel,
"A", filteredA);
983 return std::make_tuple(numDropped, boundaryNodes);
987 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
void setValidator(RCP< const ParameterEntryValidator > const &validator)
T & get(const std::string &name, T def_value)
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold, MueLu::MatrixConstruction::lumpingType &lumpingChoice)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Functor that drops boundary nodes for a blockSize == 1 problem.
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &...functors)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops off-rank entries.
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level ¤tLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Functor for marking nodes as Dirichlet.
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level ¤tLevel) const
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level ¤tLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
Functor that serially applies sub-functors to rows.
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
#define SET_VALID_ENTRY(name)
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level ¤tLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)