10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
23 #include "MueLu_AmalgamationInfo.hpp"
26 #include "MueLu_LWGraph_kokkos.hpp"
41 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
45 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
59 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
73 SET_VALID_ENTRY(
"filtered matrix: spread lumping diag dom growth factor");
77 #undef SET_VALID_ENTRY
78 validParamList->
set<
bool>(
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
79 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
82 validParamList->
getEntry(
"aggregation: drop scheme").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"point-wise",
"cut-drop",
"signed classical sa",
"classical",
"distance laplacian",
"signed classical",
"block diagonal",
"block diagonal classical",
"block diagonal distance laplacian",
"block diagonal signed classical",
"block diagonal colored signed classical",
"signed classical distance laplacian",
"signed classical sa distance laplacian"))));
87 validParamList->
getEntry(
"aggregation: strength-of-connection: measure").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"smoothed aggregation",
"signed smoothed aggregation",
"signed ruge-stueben",
"unscaled"))));
91 validParamList->
set<
RCP<const FactoryBase>>(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
96 return validParamList;
99 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
101 Input(currentLevel,
"A");
102 Input(currentLevel,
"UnAmalgamationInfo");
106 std::string socUsesMatrix = pL.
get<std::string>(
"aggregation: strength-of-connection: matrix");
107 bool needCoords = (socUsesMatrix ==
"distance laplacian");
108 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
109 std::string droppingMethod = pL.
get<std::string>(
"aggregation: drop scheme");
110 needCoords |= (droppingMethod.find(
"distance laplacian") != std::string::npos);
113 Input(currentLevel,
"Coordinates");
114 std::string distLaplMetric = pL.
get<std::string>(
"aggregation: distance laplacian metric");
115 if (distLaplMetric ==
"material")
116 Input(currentLevel,
"Material");
119 bool useBlocking = pL.
get<
bool>(
"aggregation: use blocking");
120 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
121 useBlocking |= (droppingMethod.find(
"block diagonal") != std::string::npos);
124 Input(currentLevel,
"BlockNumber");
128 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
131 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
133 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
135 std::tuple<GlobalOrdinal, boundary_nodes_type> results;
137 results = BuildScalar(currentLevel);
139 results = BuildVector(currentLevel);
143 auto boundaryNodes = std::get<1>(results);
145 GO numLocalBoundaryNodes = 0;
147 Kokkos::parallel_reduce(
148 "MueLu:CoalesceDropF:Build:bnd",
range_type(0, boundaryNodes.extent(0)),
149 KOKKOS_LAMBDA(
const LO i,
GO& n) {
150 if (boundaryNodes(i))
153 numLocalBoundaryNodes);
156 auto comm = A->getRowMap()->getComm();
158 std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
159 std::vector<GlobalOrdinal> globalStats(2);
162 GO numGlobalTotal = A->getGlobalNumEntries();
163 GO numGlobalBoundaryNodes = globalStats[0];
164 GO numGlobalDropped = globalStats[1];
166 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
167 if (numGlobalTotal != 0) {
168 GetOStream(
Statistics1) <<
"Number of dropped entries: "
169 << numGlobalDropped <<
"/" << numGlobalTotal
170 <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)" << std::endl;
176 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
179 auto material = Get<RCP<MultiVector>>(currentLevel,
"Material");
182 if (material->getNumVectors() == 1) {
183 GetOStream(
Runtime0) <<
"material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
188 material->meanValue(means());
189 std::stringstream ss;
190 ss <<
"material tensor mean =" << std::endl;
192 for (
size_t i = 0; i < spatialDim; ++i) {
194 for (
size_t j = 0; j < spatialDim; ++j) {
195 ss << means[k] <<
" ";
208 template <
class magnitudeType>
209 void translateOldAlgoParam(
const Teuchos::ParameterList& pL, std::string& droppingMethod,
bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure,
bool& symmetrizeDroppedGraph,
bool& generateColoringGraph, magnitudeType& threshold) {
210 std::set<std::string> validDroppingMethods = {
"piece-wise",
"cut-drop"};
211 if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
212 std::string algo = droppingMethod;
213 std::string classicalAlgoStr = pL.
get<std::string>(
"aggregation: classical algo");
214 std::string distanceLaplacianAlgoStr = pL.
get<std::string>(
"aggregation: distance laplacian algo");
217 if (algo.find(
"block diagonal") == 0) {
219 algo = algo.substr(14);
221 algo = algo.substr(1);
225 if ((algo ==
"classical") || (algo ==
"signed classical sa") || (algo ==
"signed classical") || (algo ==
"colored signed classical")) {
228 if (algo ==
"classical") {
229 socUsesMeasure =
"smoothed aggregation";
230 }
else if (algo ==
"signed classical sa") {
231 socUsesMeasure =
"signed smoothed aggregation";
232 }
else if (algo ==
"signed classical") {
233 socUsesMeasure =
"signed ruge-stueben";
234 }
else if (algo ==
"colored signed classical") {
235 socUsesMeasure =
"signed ruge-stueben";
236 generateColoringGraph =
true;
239 if (classicalAlgoStr ==
"default")
240 droppingMethod =
"point-wise";
241 else if (classicalAlgoStr ==
"unscaled cut") {
242 socUsesMeasure =
"unscaled";
243 droppingMethod =
"cut-drop";
244 }
else if (classicalAlgoStr ==
"scaled cut") {
245 droppingMethod =
"cut-drop";
246 }
else if (classicalAlgoStr ==
"scaled cut symmetric") {
247 droppingMethod =
"cut-drop";
248 symmetrizeDroppedGraph =
true;
250 }
else if ((algo ==
"distance laplacian") || (algo ==
"signed classical sa distance laplacian") || (algo ==
"signed classical distance laplacian")) {
251 socUsesMatrix =
"distance laplacian";
253 if (algo ==
"distance laplacian") {
254 socUsesMeasure =
"smoothed aggregation";
255 }
else if (algo ==
"signed classical sa distance laplacian") {
256 socUsesMeasure =
"signed smoothed aggregation";
257 }
else if (algo ==
"signed classical distance laplacian") {
258 socUsesMeasure =
"signed ruge-stueben";
261 if (distanceLaplacianAlgoStr ==
"default")
262 droppingMethod =
"point-wise";
263 else if (distanceLaplacianAlgoStr ==
"unscaled cut") {
264 socUsesMeasure =
"unscaled";
265 droppingMethod =
"cut-drop";
266 }
else if (distanceLaplacianAlgoStr ==
"scaled cut") {
267 droppingMethod =
"cut-drop";
268 }
else if (distanceLaplacianAlgoStr ==
"scaled cut symmetric") {
269 droppingMethod =
"cut-drop";
270 symmetrizeDroppedGraph =
true;
272 }
else if (algo ==
"") {
280 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
287 using local_matrix_type =
typename MatrixType::local_matrix_type;
288 using local_graph_type =
typename GraphType::local_graph_type;
289 using rowptr_type =
typename local_graph_type::row_map_type::non_const_type;
290 using entries_type =
typename local_graph_type::entries_type::non_const_type;
291 using values_type =
typename local_matrix_type::values_type::non_const_type;
292 using device_type =
typename Node::device_type;
293 using memory_space =
typename device_type::memory_space;
300 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
307 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
308 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
312 bool useBlocking = pL.get<
bool>(
"aggregation: use blocking");
313 std::string droppingMethod = pL.get<std::string>(
"aggregation: drop scheme");
314 std::string socUsesMatrix = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
315 std::string socUsesMeasure = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
316 std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
317 bool symmetrizeDroppedGraph = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
318 magnitudeType threshold;
320 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
321 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
323 threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
324 bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
327 const bool lumping = pL.get<
bool>(
"filtered matrix: use lumping");
328 const bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
329 const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
331 const bool useRootStencil = pL.get<
bool>(
"filtered matrix: use root stencil");
332 const bool useSpreadLumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
334 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
337 bool generateColoringGraph = pL.get<
bool>(
"aggregation: coloring: use color graph");
338 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
339 const bool symmetrizeColoringGraph =
true;
341 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
342 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold);
346 std::stringstream ss;
347 ss <<
"dropping scheme = \"" << droppingMethod <<
"\", strength-of-connection measure = \"" << socUsesMeasure <<
"\", strength-of-connection matrix = \"" << socUsesMatrix <<
"\", ";
348 if (socUsesMatrix ==
"distance laplacian")
349 ss <<
"distance laplacian metric = \"" << distanceLaplacianMetric <<
"\", ";
350 ss <<
"threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() <<
", useBlocking = " << useBlocking;
351 ss <<
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
358 if (droppingMethod ==
"cut-drop")
372 auto crsA = toCrsMatrix(A);
373 auto lclA = crsA->getLocalMatrixDevice();
391 #define MueLu_runBoundaryFunctors(...) \
393 auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
394 Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
399 if (rowSumTol <= 0.) {
406 #undef MueLu_runBoundaryFunctors
435 #if !defined(HAVE_MUELU_DEBUG)
436 #define MueLu_runDroppingFunctorsImpl(...) \
438 auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__); \
439 Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz_filtered); \
442 #define MueLu_runDroppingFunctorsImpl(...) \
444 auto debug = Misc::DebugFunctor(lclA, results); \
445 auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__, debug); \
446 Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz_filtered); \
452 #define MueLu_runDroppingFunctors(...) \
455 auto BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber"); \
456 auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *BlockNumber, results); \
457 MueLu_runDroppingFunctorsImpl(block_diagonalize, __VA_ARGS__); \
459 MueLu_runDroppingFunctorsImpl(__VA_ARGS__); \
464 #define MueLu_runDroppingFunctors_on_A(SoC) \
466 if (droppingMethod == "point-wise") { \
467 auto dropping = ClassicalDropping::make_drop_functor<SoC>(*A, threshold, results); \
469 if (aggregationMayCreateDirichlet) { \
470 MueLu_runDroppingFunctors(dropping, \
472 preserve_diagonals, \
473 mark_singletons_as_boundary); \
475 MueLu_runDroppingFunctors(dropping, \
477 preserve_diagonals); \
479 } else if (droppingMethod == "cut-drop") { \
480 auto comparison = CutDrop::make_comparison_functor<SoC>(*A, results); \
481 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
483 MueLu_runDroppingFunctors(drop_boundaries, \
484 preserve_diagonals, \
491 #define MueLu_runDroppingFunctors_on_dlap_inner(SoC) \
493 if (droppingMethod == "point-wise") { \
494 auto dist_laplacian_dropping = DistanceLaplacian::make_drop_functor<SoC>(*A, threshold, dist2, results); \
496 if (aggregationMayCreateDirichlet) { \
497 MueLu_runDroppingFunctors(dist_laplacian_dropping, \
499 preserve_diagonals, \
500 mark_singletons_as_boundary); \
502 MueLu_runDroppingFunctors(dist_laplacian_dropping, \
504 preserve_diagonals); \
506 } else if (droppingMethod == "cut-drop") { \
507 auto comparison = CutDrop::make_dlap_comparison_functor<SoC>(*A, dist2, results); \
508 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
510 MueLu_runDroppingFunctors(drop_boundaries, \
511 preserve_diagonals, \
518 #define MueLu_runDroppingFunctors_on_dlap(SoC) \
520 if (distanceLaplacianMetric == "unweighted") { \
521 auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords); \
522 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
523 } else if (distanceLaplacianMetric == "material") { \
524 auto material = GetMaterial(currentLevel, coords->getNumVectors()); \
525 if (material->getNumVectors() == 1) { \
526 auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material); \
527 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
529 auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material); \
530 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
536 auto filtered_rowptr = rowptr_type(
"filtered_rowptr", lclA.numRows() + 1);
540 auto results = Kokkos::View<DecisionType*, memory_space>(
"results", lclA.nnz());
546 if (threshold != zero) {
550 if (socUsesMatrix ==
"A") {
551 if (socUsesMeasure ==
"unscaled") {
553 }
else if (socUsesMeasure ==
"smoothed aggregation") {
555 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
557 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
560 }
else if (socUsesMatrix ==
"distance laplacian") {
561 auto coords = Get<RCP<doubleMultiVector>>(currentLevel,
"Coordinates");
562 if (socUsesMeasure ==
"unscaled") {
564 }
else if (socUsesMeasure ==
"smoothed aggregation") {
566 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
568 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
573 Kokkos::deep_copy(results,
KEEP);
580 if (symmetrizeDroppedGraph) {
585 GO numDropped = lclA.nnz() - nnz_filtered;
598 local_matrix_type lclFilteredA;
599 local_graph_type lclGraph;
601 filteredA = MatrixFactory::BuildCopy(A);
602 lclFilteredA = filteredA->getLocalMatrixDevice();
604 auto colidx = entries_type(
"entries", nnz_filtered);
605 lclGraph = local_graph_type(colidx, filtered_rowptr);
607 auto colidx = entries_type(
"entries", nnz_filtered);
608 auto values = values_type(
"values", nnz_filtered);
609 lclFilteredA = local_matrix_type(
"filteredA",
610 lclA.numRows(), lclA.numCols(),
612 values, filtered_rowptr, colidx);
618 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
621 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
626 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
629 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
634 filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
635 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
637 if (reuseEigenvalue) {
642 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
649 lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
651 graph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"amalgamated graph of A"));
656 if (generateColoringGraph) {
657 SubFactoryMonitor mColoringGraph(*
this,
"Construct coloring graph", currentLevel);
659 filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
660 if (localizeColoringGraph) {
664 if (symmetrizeColoringGraph) {
668 auto colidx = entries_type(
"entries_coloring_graph", nnz_filtered);
669 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
671 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
673 auto colorGraph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"coloring graph"));
674 Set(currentLevel,
"Coloring Graph", colorGraph);
677 #undef MueLu_runDroppingFunctors_on_A
678 #undef MueLu_runDroppingFunctors_on_dlap_inner
679 #undef MueLu_runDroppingFunctors_on_dlap
680 #undef MueLu_runDroppingFunctors
681 #undef MueLu_runDroppingFunctorsImpl
684 Set(currentLevel,
"DofsPerNode", dofsPerNode);
685 Set(currentLevel,
"Graph", graph);
686 Set(currentLevel,
"A", filteredA);
688 return std::make_tuple(numDropped, boundaryNodes);
691 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
698 using local_matrix_type =
typename MatrixType::local_matrix_type;
699 using local_graph_type =
typename GraphType::local_graph_type;
700 using rowptr_type =
typename local_graph_type::row_map_type::non_const_type;
701 using entries_type =
typename local_graph_type::entries_type::non_const_type;
702 using values_type =
typename local_matrix_type::values_type::non_const_type;
703 using device_type =
typename Node::device_type;
704 using memory_space =
typename device_type::memory_space;
711 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
731 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
733 auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel,
"UnAmalgamationInfo");
745 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation());
746 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
748 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
749 rowTranslationView(rowTranslationArray.
getRawPtr(), rowTranslationArray.
size());
750 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
751 colTranslationView(colTranslationArray.
getRawPtr(), colTranslationArray.
size());
754 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
755 typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
756 id_translation_type rowTranslation(
"dofId2nodeId", rowTranslationArray.
size());
757 id_translation_type colTranslation(
"ov_dofId2nodeId", colTranslationArray.
size());
758 Kokkos::deep_copy(rowTranslation, rowTranslationView);
759 Kokkos::deep_copy(colTranslation, colTranslationView);
762 blkSize = A->GetFixedBlockSize();
765 if (A->IsView(
"stridedMaps") ==
true) {
769 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
770 blkId = strMap->getStridedBlockId();
772 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
782 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
783 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
785 const bool useGreedyDirichlet = pL.get<
bool>(
"aggregation: greedy Dirichlet");
789 bool useBlocking = pL.get<
bool>(
"aggregation: use blocking");
790 std::string droppingMethod = pL.get<std::string>(
"aggregation: drop scheme");
791 std::string socUsesMatrix = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
792 std::string socUsesMeasure = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
793 std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
794 bool symmetrizeDroppedGraph = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
795 magnitudeType threshold;
797 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
798 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
800 threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
801 bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
804 const bool lumping = pL.get<
bool>(
"filtered matrix: use lumping");
805 const bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
806 const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
808 const bool useRootStencil = pL.get<
bool>(
"filtered matrix: use root stencil");
809 const bool useSpreadLumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
811 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
814 bool generateColoringGraph = pL.get<
bool>(
"aggregation: coloring: use color graph");
815 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
816 const bool symmetrizeColoringGraph =
true;
818 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
819 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold);
822 std::stringstream ss;
823 ss <<
"dropping scheme = \"" << droppingMethod <<
"\", strength-of-connection measure = \"" << socUsesMeasure <<
"\", strength-of-connection matrix = \"" << socUsesMatrix <<
"\", ";
824 if (socUsesMatrix ==
"distance laplacian")
825 ss <<
"distance laplacian metric = \"" << distanceLaplacianMetric <<
"\", ";
826 ss <<
"threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() <<
", useBlocking = " << useBlocking;
827 ss <<
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
834 if (droppingMethod ==
"cut-drop")
835 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0,
Exceptions::RuntimeError,
"For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold <<
", needs to be <= 1.0");
848 auto crsA = toCrsMatrix(A);
849 auto lclA = crsA->getLocalMatrixDevice();
864 #define MueLu_runBoundaryFunctors(...) \
866 auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
867 Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
870 if (useGreedyDirichlet) {
877 #undef MueLu_runBoundaryFunctors
900 #if !defined(HAVE_MUELU_DEBUG)
901 #define MueLu_runDroppingFunctorsImpl(...) \
903 auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__); \
904 Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz); \
907 #define MueLu_runDroppingFunctorsImpl(...) \
909 auto debug = Misc::DebugFunctor(lclA, results); \
910 auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__, debug); \
911 Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz); \
917 #define MueLu_runDroppingFunctors(...) \
920 auto BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber"); \
921 auto block_diagonalize = Misc::BlockDiagonalizeVectorFunctor(*A, *BlockNumber, nonUniqueMap, results, rowTranslation, colTranslation); \
922 MueLu_runDroppingFunctorsImpl(block_diagonalize, __VA_ARGS__); \
924 MueLu_runDroppingFunctorsImpl(__VA_ARGS__); \
929 #define MueLu_runDroppingFunctors_on_A(SoC) \
931 if (droppingMethod == "point-wise") { \
932 auto dropping = ClassicalDropping::make_drop_functor<SoC>(*A, threshold, results); \
934 if (aggregationMayCreateDirichlet) { \
935 MueLu_runDroppingFunctors(dropping, \
936 preserve_diagonals, \
937 mark_singletons_as_boundary); \
939 MueLu_runDroppingFunctors(dropping, \
940 preserve_diagonals); \
942 } else if (droppingMethod == "cut-drop") { \
943 auto comparison = CutDrop::make_comparison_functor<SoC>(*A, results); \
944 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
946 MueLu_runDroppingFunctors(drop_boundaries, \
947 preserve_diagonals, \
954 #define MueLu_runDroppingFunctors_on_dlap_inner(SoC) \
956 if (droppingMethod == "point-wise") { \
957 auto dist_laplacian_dropping = DistanceLaplacian::make_vector_drop_functor<SoC>(*A, *mergedA, threshold, dist2, results, rowTranslation, colTranslation); \
959 if (aggregationMayCreateDirichlet) { \
960 MueLu_runDroppingFunctors(dist_laplacian_dropping, \
961 preserve_diagonals, \
962 mark_singletons_as_boundary); \
964 MueLu_runDroppingFunctors(dist_laplacian_dropping, \
965 preserve_diagonals); \
967 } else if (droppingMethod == "cut-drop") { \
968 auto comparison = CutDrop::make_dlap_comparison_functor<SoC>(*A, dist2, results); \
969 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
971 MueLu_runDroppingFunctors(drop_boundaries, \
972 preserve_diagonals, \
979 #define MueLu_runDroppingFunctors_on_dlap(SoC) \
981 if (distanceLaplacianMetric == "unweighted") { \
982 auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*mergedA, coords); \
983 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
984 } else if (distanceLaplacianMetric == "weighted") { \
985 auto k_dlap_weights_host = Kokkos::View<double*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>(&dlap_weights[0], dlap_weights.size()); \
986 auto k_dlap_weights = Kokkos::View<double*>("dlap_weights", k_dlap_weights_host.extent(0)); \
987 Kokkos::deep_copy(k_dlap_weights, k_dlap_weights_host); \
988 auto dist2 = DistanceLaplacian::WeightedDistanceFunctor(*mergedA, coords, k_dlap_weights); \
989 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
990 } else if (distanceLaplacianMetric == "block weighted") { \
991 auto k_dlap_weights_host = Kokkos::View<double*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>(&dlap_weights[0], dlap_weights.size()); \
992 auto k_dlap_weights = Kokkos::View<double*>("dlap_weights", k_dlap_weights_host.extent(0)); \
993 Kokkos::deep_copy(k_dlap_weights, k_dlap_weights_host); \
994 auto dist2 = DistanceLaplacian::BlockWeightedDistanceFunctor(*mergedA, coords, k_dlap_weights, interleaved_blocksize); \
995 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
996 } else if (distanceLaplacianMetric == "material") { \
997 auto material = GetMaterial(currentLevel, coords->getNumVectors()); \
998 if (material->getNumVectors() == 1) { \
999 auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*mergedA, coords, material); \
1000 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
1002 auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*mergedA, coords, material); \
1003 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
1009 auto filtered_rowptr = rowptr_type(
"rowptr", lclA.numRows() + 1);
1010 auto graph_rowptr = rowptr_type(
"rowptr", numNodes + 1);
1012 Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
1015 auto results = Kokkos::View<DecisionType*, memory_space>(
"results", lclA.nnz());
1021 if (threshold != zero) {
1025 if (socUsesMatrix ==
"A") {
1026 if (socUsesMeasure ==
"unscaled") {
1028 }
else if (socUsesMeasure ==
"smoothed aggregation") {
1030 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
1032 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
1035 }
else if (socUsesMatrix ==
"distance laplacian") {
1036 auto coords = Get<RCP<doubleMultiVector>>(currentLevel,
"Coordinates");
1039 LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize"));
1040 if (socUsesMeasure ==
"distance laplacian") {
1041 LO dim = (
LO)coords->getNumVectors();
1043 bool non_unity =
false;
1044 for (
LO i = 0; !non_unity && i < (
LO)dlap_weights.size(); i++) {
1045 if (dlap_weights[i] != 1.0) {
1050 if ((
LO)dlap_weights.size() == dim) {
1051 distanceLaplacianMetric =
"weighted";
1052 }
else if ((
LO)dlap_weights.size() == interleaved_blocksize * dim)
1053 distanceLaplacianMetric =
"block weighted";
1056 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
1059 GetOStream(
Statistics1) <<
"Using distance laplacian weights: " << dlap_weights << std::endl;
1067 auto merged_rowptr = rowptr_type(
"rowptr", numNodes + 1);
1071 Kokkos::parallel_scan(
"MergeCount", range, functor, nnz_merged);
1073 local_graph_type lclMergedGraph;
1074 auto colidx_merged = entries_type(
"entries", nnz_merged);
1075 auto values_merged = values_type(
"values", nnz_merged);
1077 local_matrix_type lclMergedA = local_matrix_type(
"mergedA",
1078 numNodes, nonUniqueMap->getLocalNumElements(),
1080 values_merged, merged_rowptr, colidx_merged);
1083 Kokkos::parallel_for(
"MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
1088 if (socUsesMeasure ==
"unscaled") {
1090 }
else if (socUsesMeasure ==
"smoothed aggregation") {
1092 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
1094 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
1099 Kokkos::deep_copy(results,
KEEP);
1105 if (symmetrizeDroppedGraph) {
1112 GO numTotal = lclA.nnz();
1113 GO numDropped = numTotal - nnz_filtered;
1126 local_matrix_type lclFilteredA;
1128 lclFilteredA = local_matrix_type(
"filteredA", lclA.graph, lclA.numCols());
1130 auto colidx = entries_type(
"entries", nnz_filtered);
1131 auto values = values_type(
"values", nnz_filtered);
1132 lclFilteredA = local_matrix_type(
"filteredA",
1133 lclA.numRows(), lclA.numCols(),
1135 values, filtered_rowptr, colidx);
1138 local_graph_type lclGraph;
1140 auto colidx = entries_type(
"entries", nnz_graph);
1141 lclGraph = local_graph_type(colidx, graph_rowptr);
1147 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
1150 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
1155 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
1158 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
1163 filteredA->SetFixedBlockSize(blkSize);
1165 if (reuseEigenvalue) {
1170 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
1175 graph =
rcp(
new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
1180 if (generateColoringGraph) {
1181 SubFactoryMonitor mColoringGraph(*
this,
"Construct coloring graph", currentLevel);
1183 filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
1184 graph_rowptr = rowptr_type(
"rowptr", numNodes + 1);
1185 if (localizeColoringGraph) {
1189 if (symmetrizeColoringGraph) {
1193 auto colidx = entries_type(
"entries_coloring_graph", nnz_filtered);
1194 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
1196 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
1198 auto colorGraph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"coloring graph"));
1199 Set(currentLevel,
"Coloring Graph", colorGraph);
1202 #undef MueLu_runDroppingFunctors_on_dlap
1203 #undef MueLu_runDroppingFunctors_on_dlap_inner
1204 #undef MueLu_runDroppingFunctors_on_A
1205 #undef MueLu_runDroppingFunctors
1206 #undef MueLu_runDroppingFunctorsImpl
1208 LO dofsPerNode = blkSize;
1210 Set(currentLevel,
"DofsPerNode", dofsPerNode);
1211 Set(currentLevel,
"Graph", graph);
1212 Set(currentLevel,
"A", filteredA);
1214 return std::make_tuple(numDropped, boundaryNodes);
1218 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
Teuchos::RCP< MultiVector > GetMaterial(Level ¤tLevel, size_t spatialDim) const
void setValidator(RCP< const ParameterEntryValidator > const &validator)
Functor that drops boundary nodes for a blockSize > 1 problem.
T & get(const std::string &name, T def_value)
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
#define MueLu_runDroppingFunctors_on_dlap(SoC)
#define MueLu_runBoundaryFunctors(...)
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops boundary nodes for a blockSize == 1 problem.
Functor that drops off-rank entries.
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level ¤tLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Functor for marking nodes as Dirichlet.
Functor that marks diagonal as kept, unless the are already marked as boundary.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level ¤tLevel) const
#define MueLu_runDroppingFunctors_on_A(SoC)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level ¤tLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
#define SET_VALID_ENTRY(name)
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level ¤tLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)
#define MueLu_runDroppingFunctors(...)