10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
23 #include "MueLu_AmalgamationInfo.hpp"
26 #include "MueLu_LWGraph_kokkos.hpp"
29 #include "MueLu_Utilities.hpp"
39 #include "MueLu_ScalarDroppingClassical.hpp"
40 #include "MueLu_ScalarDroppingDistanceLaplacian.hpp"
42 #include "MueLu_VectorDroppingClassical.hpp"
43 #include "MueLu_VectorDroppingDistanceLaplacian.hpp"
47 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
51 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
65 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
79 SET_VALID_ENTRY(
"filtered matrix: spread lumping diag dom growth factor");
83 #undef SET_VALID_ENTRY
84 validParamList->
set<
bool>(
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
85 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
88 validParamList->
getEntry(
"aggregation: drop scheme").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"point-wise",
"cut-drop",
"signed classical sa",
"classical",
"distance laplacian",
"signed classical",
"block diagonal",
"block diagonal classical",
"block diagonal distance laplacian",
"block diagonal signed classical",
"block diagonal colored signed classical",
"signed classical distance laplacian",
"signed classical sa distance laplacian"))));
93 validParamList->
getEntry(
"aggregation: strength-of-connection: measure").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"smoothed aggregation",
"signed smoothed aggregation",
"signed ruge-stueben",
"unscaled"))));
97 validParamList->
set<
RCP<const FactoryBase>>(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
102 return validParamList;
105 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
107 Input(currentLevel,
"A");
108 Input(currentLevel,
"UnAmalgamationInfo");
112 std::string socUsesMatrix = pL.
get<std::string>(
"aggregation: strength-of-connection: matrix");
113 bool needCoords = (socUsesMatrix ==
"distance laplacian");
114 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
115 std::string droppingMethod = pL.
get<std::string>(
"aggregation: drop scheme");
116 needCoords |= (droppingMethod.find(
"distance laplacian") != std::string::npos);
119 Input(currentLevel,
"Coordinates");
120 std::string distLaplMetric = pL.
get<std::string>(
"aggregation: distance laplacian metric");
121 if (distLaplMetric ==
"material")
122 Input(currentLevel,
"Material");
125 bool useBlocking = pL.
get<
bool>(
"aggregation: use blocking");
126 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
127 useBlocking |= (droppingMethod.find(
"block diagonal") != std::string::npos);
130 Input(currentLevel,
"BlockNumber");
134 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
137 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
139 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
141 std::tuple<GlobalOrdinal, boundary_nodes_type> results;
143 results = BuildScalar(currentLevel);
145 results = BuildVector(currentLevel);
149 auto boundaryNodes = std::get<1>(results);
151 GO numLocalBoundaryNodes = 0;
153 Kokkos::parallel_reduce(
154 "MueLu:CoalesceDropF:Build:bnd",
range_type(0, boundaryNodes.extent(0)),
155 KOKKOS_LAMBDA(
const LO i,
GO& n) {
156 if (boundaryNodes(i))
159 numLocalBoundaryNodes);
162 auto comm = A->getRowMap()->getComm();
164 std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
165 std::vector<GlobalOrdinal> globalStats(2);
168 GO numGlobalTotal = A->getGlobalNumEntries();
169 GO numGlobalBoundaryNodes = globalStats[0];
170 GO numGlobalDropped = globalStats[1];
172 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
173 if (numGlobalTotal != 0) {
174 GetOStream(
Statistics1) <<
"Number of dropped entries: "
175 << numGlobalDropped <<
"/" << numGlobalTotal
176 <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)" << std::endl;
182 template <
class local_matrix_type,
class boundary_nodes_view,
class... Functors>
183 void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
184 using local_ordinal_type =
typename local_matrix_type::ordinal_type;
185 using execution_space =
typename local_matrix_type::execution_space;
186 using range_type = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
187 auto range = range_type(0, boundaryNodes.extent(0));
189 Kokkos::parallel_for(
"CoalesceDrop::BoundaryDetection", range, boundaries);
192 template <
class magnitudeType>
193 void translateOldAlgoParam(
const Teuchos::ParameterList& pL, std::string& droppingMethod,
bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure,
bool& symmetrizeDroppedGraph,
bool& generateColoringGraph, magnitudeType& threshold) {
194 std::set<std::string> validDroppingMethods = {
"piece-wise",
"cut-drop"};
195 if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
196 std::string algo = droppingMethod;
197 std::string classicalAlgoStr = pL.
get<std::string>(
"aggregation: classical algo");
198 std::string distanceLaplacianAlgoStr = pL.
get<std::string>(
"aggregation: distance laplacian algo");
201 if (algo.find(
"block diagonal") == 0) {
203 algo = algo.substr(14);
205 algo = algo.substr(1);
209 if ((algo ==
"classical") || (algo ==
"signed classical sa") || (algo ==
"signed classical") || (algo ==
"colored signed classical")) {
212 if (algo ==
"classical") {
213 socUsesMeasure =
"smoothed aggregation";
214 }
else if (algo ==
"signed classical sa") {
215 socUsesMeasure =
"signed smoothed aggregation";
216 }
else if (algo ==
"signed classical") {
217 socUsesMeasure =
"signed ruge-stueben";
218 }
else if (algo ==
"colored signed classical") {
219 socUsesMeasure =
"signed ruge-stueben";
220 generateColoringGraph =
true;
223 if (classicalAlgoStr ==
"default")
224 droppingMethod =
"point-wise";
225 else if (classicalAlgoStr ==
"unscaled cut") {
226 socUsesMeasure =
"unscaled";
227 droppingMethod =
"cut-drop";
228 }
else if (classicalAlgoStr ==
"scaled cut") {
229 droppingMethod =
"cut-drop";
230 }
else if (classicalAlgoStr ==
"scaled cut symmetric") {
231 droppingMethod =
"cut-drop";
232 symmetrizeDroppedGraph =
true;
234 }
else if ((algo ==
"distance laplacian") || (algo ==
"signed classical sa distance laplacian") || (algo ==
"signed classical distance laplacian")) {
235 socUsesMatrix =
"distance laplacian";
237 if (algo ==
"distance laplacian") {
238 socUsesMeasure =
"smoothed aggregation";
239 }
else if (algo ==
"signed classical sa distance laplacian") {
240 socUsesMeasure =
"signed smoothed aggregation";
241 }
else if (algo ==
"signed classical distance laplacian") {
242 socUsesMeasure =
"signed ruge-stueben";
245 if (distanceLaplacianAlgoStr ==
"default")
246 droppingMethod =
"point-wise";
247 else if (distanceLaplacianAlgoStr ==
"unscaled cut") {
248 socUsesMeasure =
"unscaled";
249 droppingMethod =
"cut-drop";
250 }
else if (distanceLaplacianAlgoStr ==
"scaled cut") {
251 droppingMethod =
"cut-drop";
252 }
else if (distanceLaplacianAlgoStr ==
"scaled cut symmetric") {
253 droppingMethod =
"cut-drop";
254 symmetrizeDroppedGraph =
true;
256 }
else if (algo ==
"") {
264 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
271 using local_matrix_type =
typename MatrixType::local_matrix_type;
272 using local_graph_type =
typename GraphType::local_graph_type;
273 using rowptr_type =
typename local_graph_type::row_map_type::non_const_type;
274 using entries_type =
typename local_graph_type::entries_type::non_const_type;
275 using values_type =
typename local_matrix_type::values_type::non_const_type;
276 using device_type =
typename Node::device_type;
277 using memory_space =
typename device_type::memory_space;
278 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
285 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
292 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
293 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
297 bool useBlocking = pL.get<
bool>(
"aggregation: use blocking");
298 std::string droppingMethod = pL.get<std::string>(
"aggregation: drop scheme");
299 std::string socUsesMatrix = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
300 std::string socUsesMeasure = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
301 std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
302 bool symmetrizeDroppedGraph = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
303 magnitudeType threshold;
305 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
306 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
308 threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
309 bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
312 const bool lumping = pL.get<
bool>(
"filtered matrix: use lumping");
313 const bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
314 const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
316 const bool useRootStencil = pL.get<
bool>(
"filtered matrix: use root stencil");
317 const bool useSpreadLumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
319 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
322 bool generateColoringGraph = pL.get<
bool>(
"aggregation: coloring: use color graph");
323 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
324 const bool symmetrizeColoringGraph =
true;
326 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
327 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold);
331 std::stringstream ss;
332 ss <<
"dropping scheme = \"" << droppingMethod <<
"\", strength-of-connection measure = \"" << socUsesMeasure <<
"\", strength-of-connection matrix = \"" << socUsesMatrix <<
"\", ";
333 if (socUsesMatrix ==
"distance laplacian")
334 ss <<
"distance laplacian metric = \"" << distanceLaplacianMetric <<
"\", ";
335 ss <<
"threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() <<
", useBlocking = " << useBlocking;
336 ss <<
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
343 if (droppingMethod ==
"cut-drop")
357 auto crsA = toCrsMatrix(A);
358 auto lclA = crsA->getLocalMatrixDevice();
378 if (rowSumTol <= 0.) {
411 auto filtered_rowptr = rowptr_type(
"filtered_rowptr", lclA.numRows() + 1);
415 auto results = results_view_type(
"results", lclA.nnz());
419 if (threshold != zero) {
420 if (socUsesMatrix ==
"A") {
421 if (socUsesMeasure ==
"unscaled") {
422 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
423 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
424 }
else if (socUsesMeasure ==
"smoothed aggregation") {
425 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
426 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
427 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
428 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
429 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
430 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
431 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
432 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
434 }
else if (socUsesMatrix ==
"distance laplacian") {
435 auto coords = Get<RCP<doubleMultiVector>>(currentLevel,
"Coordinates");
436 if (socUsesMeasure ==
"unscaled") {
437 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
438 }
else if (socUsesMeasure ==
"smoothed aggregation") {
439 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
440 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
441 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
442 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
443 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
447 Kokkos::deep_copy(results,
KEEP);
449 if (symmetrizeDroppedGraph) {
451 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, drop_boundaries);
454 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, no_op);
458 if (symmetrizeDroppedGraph) {
460 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, symmetrize);
463 GO numDropped = lclA.nnz() - nnz_filtered;
476 local_matrix_type lclFilteredA;
477 local_graph_type lclGraph;
479 filteredA = MatrixFactory::BuildCopy(A);
480 lclFilteredA = filteredA->getLocalMatrixDevice();
482 auto colidx = entries_type(
"entries", nnz_filtered);
483 lclGraph = local_graph_type(colidx, filtered_rowptr);
485 auto colidx = entries_type(
"entries", nnz_filtered);
486 auto values = values_type(
"values", nnz_filtered);
487 lclFilteredA = local_matrix_type(
"filteredA",
488 lclA.numRows(), lclA.numCols(),
490 values, filtered_rowptr, colidx);
496 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
499 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
504 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
507 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
512 filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
513 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
515 if (reuseEigenvalue) {
520 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
527 lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
529 graph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"amalgamated graph of A"));
534 if (generateColoringGraph) {
535 SubFactoryMonitor mColoringGraph(*
this,
"Construct coloring graph", currentLevel);
537 filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
538 if (localizeColoringGraph) {
540 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, drop_offrank);
542 if (symmetrizeColoringGraph) {
544 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, symmetrize);
546 auto colidx = entries_type(
"entries_coloring_graph", nnz_filtered);
547 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
549 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
551 auto colorGraph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"coloring graph"));
552 Set(currentLevel,
"Coloring Graph", colorGraph);
556 Set(currentLevel,
"DofsPerNode", dofsPerNode);
557 Set(currentLevel,
"Graph", graph);
558 Set(currentLevel,
"A", filteredA);
560 return std::make_tuple(numDropped, boundaryNodes);
563 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
570 using local_matrix_type =
typename MatrixType::local_matrix_type;
571 using local_graph_type =
typename GraphType::local_graph_type;
572 using rowptr_type =
typename local_graph_type::row_map_type::non_const_type;
573 using entries_type =
typename local_graph_type::entries_type::non_const_type;
574 using values_type =
typename local_matrix_type::values_type::non_const_type;
575 using device_type =
typename Node::device_type;
576 using memory_space =
typename device_type::memory_space;
577 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
584 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
604 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
606 auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel,
"UnAmalgamationInfo");
618 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation());
619 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
621 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
622 rowTranslationView(rowTranslationArray.
getRawPtr(), rowTranslationArray.
size());
623 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
624 colTranslationView(colTranslationArray.
getRawPtr(), colTranslationArray.
size());
627 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
628 typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
629 id_translation_type rowTranslation(
"dofId2nodeId", rowTranslationArray.
size());
630 id_translation_type colTranslation(
"ov_dofId2nodeId", colTranslationArray.
size());
631 Kokkos::deep_copy(rowTranslation, rowTranslationView);
632 Kokkos::deep_copy(colTranslation, colTranslationView);
635 blkSize = A->GetFixedBlockSize();
638 if (A->IsView(
"stridedMaps") ==
true) {
642 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
643 blkId = strMap->getStridedBlockId();
645 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
655 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
656 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
658 const bool useGreedyDirichlet = pL.get<
bool>(
"aggregation: greedy Dirichlet");
662 bool useBlocking = pL.get<
bool>(
"aggregation: use blocking");
663 std::string droppingMethod = pL.get<std::string>(
"aggregation: drop scheme");
664 std::string socUsesMatrix = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
665 std::string socUsesMeasure = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
666 std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
667 bool symmetrizeDroppedGraph = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
668 magnitudeType threshold;
670 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
671 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
673 threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
674 bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
677 const bool lumping = pL.get<
bool>(
"filtered matrix: use lumping");
678 const bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
679 const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
681 const bool useRootStencil = pL.get<
bool>(
"filtered matrix: use root stencil");
682 const bool useSpreadLumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
684 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
687 bool generateColoringGraph = pL.get<
bool>(
"aggregation: coloring: use color graph");
688 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
689 const bool symmetrizeColoringGraph =
true;
691 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
692 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold);
695 std::stringstream ss;
696 ss <<
"dropping scheme = \"" << droppingMethod <<
"\", strength-of-connection measure = \"" << socUsesMeasure <<
"\", strength-of-connection matrix = \"" << socUsesMatrix <<
"\", ";
697 if (socUsesMatrix ==
"distance laplacian")
698 ss <<
"distance laplacian metric = \"" << distanceLaplacianMetric <<
"\", ";
699 ss <<
"threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() <<
", useBlocking = " << useBlocking;
700 ss <<
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
707 if (droppingMethod ==
"cut-drop")
708 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0,
Exceptions::RuntimeError,
"For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold <<
", needs to be <= 1.0");
721 auto crsA = toCrsMatrix(A);
722 auto lclA = crsA->getLocalMatrixDevice();
737 if (useGreedyDirichlet) {
767 auto filtered_rowptr = rowptr_type(
"rowptr", lclA.numRows() + 1);
768 auto graph_rowptr = rowptr_type(
"rowptr", numNodes + 1);
770 Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
773 auto results = results_view_type(
"results", lclA.nnz());
782 auto merged_rowptr = rowptr_type(
"rowptr", numNodes + 1);
786 Kokkos::parallel_scan(
"MergeCount", range, functor, nnz_merged);
788 local_graph_type lclMergedGraph;
789 auto colidx_merged = entries_type(
"entries", nnz_merged);
790 auto values_merged = values_type(
"values", nnz_merged);
792 local_matrix_type lclMergedA = local_matrix_type(
"mergedA",
793 numNodes, nonUniqueMap->getLocalNumElements(),
795 values_merged, merged_rowptr, colidx_merged);
798 Kokkos::parallel_for(
"MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
803 if (threshold != zero) {
804 if (socUsesMatrix ==
"A") {
805 if (socUsesMeasure ==
"unscaled") {
806 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
807 }
else if (socUsesMeasure ==
"smoothed aggregation") {
808 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
809 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
810 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
811 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
812 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
814 }
else if (socUsesMatrix ==
"distance laplacian") {
815 auto coords = Get<RCP<doubleMultiVector>>(currentLevel,
"Coordinates");
818 LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize"));
819 if (socUsesMeasure ==
"distance laplacian") {
820 LO dim = (
LO)coords->getNumVectors();
822 bool non_unity =
false;
823 for (
LO i = 0; !non_unity && i < (
LO)dlap_weights.size(); i++) {
824 if (dlap_weights[i] != 1.0) {
829 if ((
LO)dlap_weights.size() == dim) {
830 distanceLaplacianMetric =
"weighted";
831 }
else if ((
LO)dlap_weights.size() == interleaved_blocksize * dim)
832 distanceLaplacianMetric =
"block weighted";
835 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
838 GetOStream(
Statistics1) <<
"Using distance laplacian weights: " << dlap_weights << std::endl;
842 if (socUsesMeasure ==
"unscaled") {
843 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
844 }
else if (socUsesMeasure ==
"smoothed aggregation") {
845 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
846 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
847 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
848 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
849 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
853 Kokkos::deep_copy(results,
KEEP);
856 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, no_op);
859 if (symmetrizeDroppedGraph) {
861 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, symmetrize);
866 GO numTotal = lclA.nnz();
867 GO numDropped = numTotal - nnz_filtered;
880 local_matrix_type lclFilteredA;
882 lclFilteredA = local_matrix_type(
"filteredA", lclA.graph, lclA.numCols());
884 auto colidx = entries_type(
"entries", nnz_filtered);
885 auto values = values_type(
"values", nnz_filtered);
886 lclFilteredA = local_matrix_type(
"filteredA",
887 lclA.numRows(), lclA.numCols(),
889 values, filtered_rowptr, colidx);
892 local_graph_type lclGraph;
894 auto colidx = entries_type(
"entries", nnz_graph);
895 lclGraph = local_graph_type(colidx, graph_rowptr);
901 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
904 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
909 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
912 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
917 filteredA->SetFixedBlockSize(blkSize);
919 if (reuseEigenvalue) {
924 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
929 graph =
rcp(
new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
934 if (generateColoringGraph) {
935 SubFactoryMonitor mColoringGraph(*
this,
"Construct coloring graph", currentLevel);
937 filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
938 graph_rowptr = rowptr_type(
"rowptr", numNodes + 1);
939 if (localizeColoringGraph) {
941 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, drop_offrank);
943 if (symmetrizeColoringGraph) {
945 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, symmetrize);
947 auto colidx = entries_type(
"entries_coloring_graph", nnz_filtered);
948 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
950 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
952 auto colorGraph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"coloring graph"));
953 Set(currentLevel,
"Coloring Graph", colorGraph);
956 LO dofsPerNode = blkSize;
958 Set(currentLevel,
"DofsPerNode", dofsPerNode);
959 Set(currentLevel,
"Graph", graph);
960 Set(currentLevel,
"A", filteredA);
962 return std::make_tuple(numDropped, boundaryNodes);
966 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
void setValidator(RCP< const ParameterEntryValidator > const &validator)
T & get(const std::string &name, T def_value)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Functor that drops boundary nodes for a blockSize == 1 problem.
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &...functors)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops off-rank entries.
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level ¤tLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Functor for marking nodes as Dirichlet.
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level ¤tLevel) const
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level ¤tLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
Functor that serially applies sub-functors to rows.
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
#define SET_VALID_ENTRY(name)
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level ¤tLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)