10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 
   11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 
   13 #include <Kokkos_Core.hpp> 
   14 #include <KokkosSparse_CrsMatrix.hpp> 
   23 #include "MueLu_AmalgamationInfo.hpp" 
   26 #include "MueLu_LWGraph_kokkos.hpp" 
   29 #include "MueLu_Utilities.hpp" 
   37 #include "MueLu_ScalarDroppingClassical.hpp" 
   38 #include "MueLu_ScalarDroppingDistanceLaplacian.hpp" 
   40 #include "MueLu_VectorDroppingClassical.hpp" 
   41 #include "MueLu_VectorDroppingDistanceLaplacian.hpp" 
   45 template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
   49 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) 
   63 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS 
   78   SET_VALID_ENTRY(
"filtered matrix: spread lumping diag dom growth factor");
 
   83 #undef SET_VALID_ENTRY 
   84   validParamList->
set<
bool>(
"lightweight wrap", 
true, 
"Experimental option for lightweight graph access");
 
   85 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS 
   88   validParamList->
getEntry(
"aggregation: drop scheme").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"point-wise", 
"cut-drop", 
"signed classical sa", 
"classical", 
"distance laplacian", 
"signed classical", 
"block diagonal", 
"block diagonal classical", 
"block diagonal distance laplacian", 
"block diagonal signed classical", 
"block diagonal colored signed classical", 
"signed classical distance laplacian", 
"signed classical sa distance laplacian"))));
 
   93   validParamList->
getEntry(
"aggregation: strength-of-connection: measure").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"smoothed aggregation", 
"signed smoothed aggregation", 
"signed ruge-stueben", 
"unscaled"))));
 
   97   validParamList->
set<
RCP<const FactoryBase>>(
"UnAmalgamationInfo", Teuchos::null, 
"Generating factory for UnAmalgamationInfo");
 
  102   return validParamList;
 
  105 template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  107   Input(currentLevel, 
"A");
 
  108   Input(currentLevel, 
"UnAmalgamationInfo");
 
  112   std::string socUsesMatrix = pL.
get<std::string>(
"aggregation: strength-of-connection: matrix");
 
  113   bool needCoords           = (socUsesMatrix == 
"distance laplacian");
 
  114 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS 
  115   std::string droppingMethod = pL.
get<std::string>(
"aggregation: drop scheme");
 
  116   needCoords |= (droppingMethod.find(
"distance laplacian") != std::string::npos);
 
  119     Input(currentLevel, 
"Coordinates");
 
  120     std::string distLaplMetric = pL.
get<std::string>(
"aggregation: distance laplacian metric");
 
  121     if (distLaplMetric == 
"material")
 
  122       Input(currentLevel, 
"Material");
 
  125   bool useBlocking = pL.
get<
bool>(
"aggregation: use blocking");
 
  126 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS 
  127   useBlocking |= (droppingMethod.find(
"block diagonal") != std::string::npos);
 
  130     Input(currentLevel, 
"BlockNumber");
 
  134 template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  137   auto A = Get<RCP<Matrix>>(currentLevel, 
"A");
 
  139   LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
 
  141   std::tuple<GlobalOrdinal, boundary_nodes_type> results;
 
  143     results = BuildScalar(currentLevel);
 
  145     results = BuildVector(currentLevel);
 
  149     auto boundaryNodes       = std::get<1>(results);
 
  151     GO numLocalBoundaryNodes = 0;
 
  153     Kokkos::parallel_reduce(
 
  154         "MueLu:CoalesceDropF:Build:bnd", 
range_type(0, boundaryNodes.extent(0)),
 
  155         KOKKOS_LAMBDA(
const LO i, 
GO& n) {
 
  156           if (boundaryNodes(i))
 
  159         numLocalBoundaryNodes);
 
  162       auto comm = A->getRowMap()->getComm();
 
  164       std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
 
  165       std::vector<GlobalOrdinal> globalStats(2);
 
  168       GO numGlobalTotal         = A->getGlobalNumEntries();
 
  169       GO numGlobalBoundaryNodes = globalStats[0];
 
  170       GO numGlobalDropped       = globalStats[1];
 
  172       GetOStream(
Statistics1) << 
"Detected " << numGlobalBoundaryNodes << 
" Dirichlet nodes" << std::endl;
 
  173       if (numGlobalTotal != 0) {
 
  174         GetOStream(
Statistics1) << 
"Number of dropped entries: " 
  175                                 << numGlobalDropped << 
"/" << numGlobalTotal
 
  176                                 << 
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << 
"%)" << std::endl;
 
  182 template <
class local_matrix_type, 
class boundary_nodes_view, 
class... Functors>
 
  183 void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
 
  184   using local_ordinal_type = 
typename local_matrix_type::ordinal_type;
 
  185   using execution_space    = 
typename local_matrix_type::execution_space;
 
  186   using range_type         = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
 
  187   auto range               = range_type(0, boundaryNodes.extent(0));
 
  189   Kokkos::parallel_for(
"CoalesceDrop::BoundaryDetection", range, boundaries);
 
  192 template <
class magnitudeType>
 
  194   std::set<std::string> validDroppingMethods = {
"piece-wise", 
"cut-drop"};
 
  198   if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
 
  199     std::string algo                     = droppingMethod;
 
  200     std::string classicalAlgoStr         = pL.
get<std::string>(
"aggregation: classical algo");
 
  201     std::string distanceLaplacianAlgoStr = pL.
get<std::string>(
"aggregation: distance laplacian algo");
 
  204     if (algo.find(
"block diagonal") == 0) {
 
  206       algo        = algo.substr(14);
 
  208         algo = algo.substr(1);
 
  212     if ((algo == 
"classical") || (algo == 
"signed classical sa") || (algo == 
"signed classical") || (algo == 
"colored signed classical")) {
 
  215       if (algo == 
"classical") {
 
  216         socUsesMeasure = 
"smoothed aggregation";
 
  217       } 
else if (algo == 
"signed classical sa") {
 
  218         socUsesMeasure = 
"signed smoothed aggregation";
 
  219       } 
else if (algo == 
"signed classical") {
 
  220         socUsesMeasure = 
"signed ruge-stueben";
 
  221       } 
else if (algo == 
"colored signed classical") {
 
  222         socUsesMeasure        = 
"signed ruge-stueben";
 
  223         generateColoringGraph = 
true;
 
  226       if (classicalAlgoStr == 
"default")
 
  227         droppingMethod = 
"point-wise";
 
  228       else if (classicalAlgoStr == 
"unscaled cut") {
 
  229         socUsesMeasure = 
"unscaled";
 
  230         droppingMethod = 
"cut-drop";
 
  231       } 
else if (classicalAlgoStr == 
"scaled cut") {
 
  232         droppingMethod = 
"cut-drop";
 
  233       } 
else if (classicalAlgoStr == 
"scaled cut symmetric") {
 
  234         droppingMethod         = 
"cut-drop";
 
  235         symmetrizeDroppedGraph = 
true;
 
  237     } 
else if ((algo == 
"distance laplacian") || (algo == 
"signed classical sa distance laplacian") || (algo == 
"signed classical distance laplacian")) {
 
  238       socUsesMatrix = 
"distance laplacian";
 
  240       if (algo == 
"distance laplacian") {
 
  241         socUsesMeasure = 
"smoothed aggregation";
 
  242       } 
else if (algo == 
"signed classical sa distance laplacian") {
 
  243         socUsesMeasure = 
"signed smoothed aggregation";
 
  244       } 
else if (algo == 
"signed classical distance laplacian") {
 
  245         socUsesMeasure = 
"signed ruge-stueben";
 
  248       if (distanceLaplacianAlgoStr == 
"default")
 
  249         droppingMethod = 
"point-wise";
 
  250       else if (distanceLaplacianAlgoStr == 
"unscaled cut") {
 
  251         socUsesMeasure = 
"unscaled";
 
  252         droppingMethod = 
"cut-drop";
 
  253       } 
else if (distanceLaplacianAlgoStr == 
"scaled cut") {
 
  254         droppingMethod = 
"cut-drop";
 
  255       } 
else if (distanceLaplacianAlgoStr == 
"scaled cut symmetric") {
 
  256         droppingMethod         = 
"cut-drop";
 
  257         symmetrizeDroppedGraph = 
true;
 
  259     } 
else if (algo == 
"") {
 
  267 template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  274   using local_matrix_type = 
typename MatrixType::local_matrix_type;
 
  275   using local_graph_type  = 
typename GraphType::local_graph_type;
 
  276   using rowptr_type       = 
typename local_graph_type::row_map_type::non_const_type;
 
  277   using entries_type      = 
typename local_graph_type::entries_type::non_const_type;
 
  278   using values_type       = 
typename local_matrix_type::values_type::non_const_type;
 
  279   using device_type       = 
typename Node::device_type;
 
  280   using memory_space      = 
typename device_type::memory_space;
 
  281   using results_view_type = Kokkos::View<DecisionType*, memory_space>;
 
  288   auto A = Get<RCP<Matrix>>(currentLevel, 
"A");
 
  295   const magnitudeType dirichletThreshold       = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
 
  296   const magnitudeType rowSumTol                = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
 
  300   bool useBlocking                    = pL.get<
bool>(
"aggregation: use blocking");
 
  301   std::string droppingMethod          = pL.get<std::string>(
"aggregation: drop scheme");
 
  302   std::string socUsesMatrix           = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
 
  303   std::string socUsesMeasure          = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
 
  304   std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
 
  305   bool symmetrizeDroppedGraph         = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
 
  306   magnitudeType threshold;
 
  308   if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
 
  309     threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
 
  311     threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
 
  312   bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
 
  315   const bool reuseGraph      = pL.get<
bool>(
"filtered matrix: reuse graph");
 
  316   const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
 
  318   const bool useRootStencil                            = pL.get<
bool>(
"filtered matrix: use root stencil");
 
  319   const bool useSpreadLumping                          = pL.get<
bool>(
"filtered matrix: use spread lumping");
 
  320   const std::string lumpingChoiceString                = pL.get<std::string>(
"filtered matrix: lumping choice");
 
  322   if (lumpingChoiceString == 
"diag lumping")
 
  324   else if (lumpingChoiceString == 
"distributed lumping")
 
  327   const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
 
  330   bool generateColoringGraph         = pL.get<
bool>(
"aggregation: coloring: use color graph");
 
  331   const bool localizeColoringGraph   = pL.get<
bool>(
"aggregation: coloring: localize color graph");
 
  332   const bool symmetrizeColoringGraph = 
true;
 
  334 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS 
  335   translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
 
  339     std::stringstream ss;
 
  340     ss << 
"dropping scheme = \"" << droppingMethod << 
"\", strength-of-connection measure = \"" << socUsesMeasure << 
"\", strength-of-connection matrix = \"" << socUsesMatrix << 
"\", ";
 
  341     if (socUsesMatrix == 
"distance laplacian")
 
  342       ss << 
"distance laplacian metric = \"" << distanceLaplacianMetric << 
"\", ";
 
  343     ss << 
"threshold = " << threshold << 
", blocksize = " << A->GetFixedBlockSize() << 
", useBlocking = " << useBlocking;
 
  344     ss << 
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
 
  352   if (droppingMethod == 
"cut-drop")
 
  366   auto crsA  = toCrsMatrix(A);
 
  367   auto lclA  = crsA->getLocalMatrixDevice();
 
  387     if (rowSumTol <= 0.) {
 
  420   auto filtered_rowptr = rowptr_type(
"filtered_rowptr", lclA.numRows() + 1);
 
  424   auto results = results_view_type(
"results", lclA.nnz());  
 
  428     if (threshold != zero) {
 
  429       if (socUsesMatrix == 
"A") {
 
  430         if (socUsesMeasure == 
"unscaled") {
 
  431           ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
 
  432                                                                                                                               aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
 
  433         } 
else if (socUsesMeasure == 
"smoothed aggregation") {
 
  434           ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
 
  435                                                                                                                                          aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
 
  436         } 
else if (socUsesMeasure == 
"signed ruge-stueben") {
 
  437           ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
 
  438                                                                                                                                        aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
 
  439         } 
else if (socUsesMeasure == 
"signed smoothed aggregation") {
 
  440           ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
 
  441                                                                                                                                                aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
 
  443       } 
else if (socUsesMatrix == 
"distance laplacian") {
 
  444         auto coords = Get<RCP<doubleMultiVector>>(currentLevel, 
"Coordinates");
 
  445         if (socUsesMeasure == 
"unscaled") {
 
  446           ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
 
  447         } 
else if (socUsesMeasure == 
"smoothed aggregation") {
 
  448           ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
 
  449         } 
else if (socUsesMeasure == 
"signed ruge-stueben") {
 
  450           ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
 
  451         } 
else if (socUsesMeasure == 
"signed smoothed aggregation") {
 
  452           ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
 
  456       Kokkos::deep_copy(results, 
KEEP);
 
  458       if (symmetrizeDroppedGraph) {
 
  460         ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, drop_boundaries);
 
  463         ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, no_op);
 
  467     if (symmetrizeDroppedGraph) {
 
  469       ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, symmetrize);
 
  472   GO numDropped = lclA.nnz() - nnz_filtered;
 
  485     local_matrix_type lclFilteredA;
 
  486     local_graph_type lclGraph;
 
  488       filteredA    = MatrixFactory::BuildCopy(A);
 
  489       lclFilteredA = filteredA->getLocalMatrixDevice();
 
  491       auto colidx = entries_type(
"entries", nnz_filtered);
 
  492       lclGraph    = local_graph_type(colidx, filtered_rowptr);
 
  494       auto colidx  = entries_type(
"entries", nnz_filtered);
 
  495       auto values  = values_type(
"values", nnz_filtered);
 
  496       lclFilteredA = local_matrix_type(
"filteredA",
 
  497                                        lclA.numRows(), lclA.numCols(),
 
  499                                        values, filtered_rowptr, colidx);
 
  505         Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
 
  509           Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
 
  512           Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
 
  518         Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
 
  521         Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
 
  526       filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
 
  527     filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
 
  529     if (reuseEigenvalue) {
 
  534       filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
 
  541       lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
 
  543     graph = 
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), 
"amalgamated graph of A"));
 
  548   if (generateColoringGraph) {
 
  549     SubFactoryMonitor mColoringGraph(*
this, 
"Construct coloring graph", currentLevel);
 
  551     filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
 
  552     if (localizeColoringGraph) {
 
  554       ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, drop_offrank);
 
  556     if (symmetrizeColoringGraph) {
 
  558       ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, symmetrize);
 
  560     auto colidx            = entries_type(
"entries_coloring_graph", nnz_filtered);
 
  561     auto lclGraph          = local_graph_type(colidx, filtered_rowptr);
 
  563     Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
 
  565     auto colorGraph = 
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), 
"coloring graph"));
 
  566     Set(currentLevel, 
"Coloring Graph", colorGraph);
 
  569   if (pL.get<
bool>(
"filtered matrix: count negative diagonals")) {
 
  572     GetOStream(
Runtime0) << 
"CoalesceDrop: Negative diagonals: " << neg_count << std::endl;
 
  576   Set(currentLevel, 
"DofsPerNode", dofsPerNode);
 
  577   Set(currentLevel, 
"Graph", graph);
 
  578   Set(currentLevel, 
"A", filteredA);
 
  580   return std::make_tuple(numDropped, boundaryNodes);
 
  583 template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  590   using local_matrix_type = 
typename MatrixType::local_matrix_type;
 
  591   using local_graph_type  = 
typename GraphType::local_graph_type;
 
  592   using rowptr_type       = 
typename local_graph_type::row_map_type::non_const_type;
 
  593   using entries_type      = 
typename local_graph_type::entries_type::non_const_type;
 
  594   using values_type       = 
typename local_matrix_type::values_type::non_const_type;
 
  595   using device_type       = 
typename Node::device_type;
 
  596   using memory_space      = 
typename device_type::memory_space;
 
  597   using results_view_type = Kokkos::View<DecisionType*, memory_space>;
 
  604   auto A = Get<RCP<Matrix>>(currentLevel, 
"A");
 
  624   LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
 
  626   auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, 
"UnAmalgamationInfo");
 
  638   Array<LO> rowTranslationArray     = *(amalInfo->getRowTranslation());  
 
  639   Array<LO> colTranslationArray     = *(amalInfo->getColTranslation());
 
  641   Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
 
  642       rowTranslationView(rowTranslationArray.
getRawPtr(), rowTranslationArray.
size());
 
  643   Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
 
  644       colTranslationView(colTranslationArray.
getRawPtr(), colTranslationArray.
size());
 
  647   LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
 
  648   typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
 
  649   id_translation_type rowTranslation(
"dofId2nodeId", rowTranslationArray.
size());
 
  650   id_translation_type colTranslation(
"ov_dofId2nodeId", colTranslationArray.
size());
 
  651   Kokkos::deep_copy(rowTranslation, rowTranslationView);
 
  652   Kokkos::deep_copy(colTranslation, colTranslationView);
 
  655   blkSize                  = A->GetFixedBlockSize();  
 
  658   if (A->IsView(
"stridedMaps") == 
true) {
 
  662     blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
 
  663     blkId   = strMap->getStridedBlockId();
 
  665       blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
 
  675   const magnitudeType dirichletThreshold       = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
 
  676   const magnitudeType rowSumTol                = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
 
  678   const bool useGreedyDirichlet                = pL.get<
bool>(
"aggregation: greedy Dirichlet");
 
  682   bool useBlocking                    = pL.get<
bool>(
"aggregation: use blocking");
 
  683   std::string droppingMethod          = pL.get<std::string>(
"aggregation: drop scheme");
 
  684   std::string socUsesMatrix           = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
 
  685   std::string socUsesMeasure          = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
 
  686   std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
 
  687   bool symmetrizeDroppedGraph         = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
 
  688   magnitudeType threshold;
 
  690   if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
 
  691     threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
 
  693     threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
 
  694   bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
 
  697   const bool reuseGraph      = pL.get<
bool>(
"filtered matrix: reuse graph");
 
  698   const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
 
  700   const bool useRootStencil                            = pL.get<
bool>(
"filtered matrix: use root stencil");
 
  701   const bool useSpreadLumping                          = pL.get<
bool>(
"filtered matrix: use spread lumping");
 
  702   const std::string lumpingChoiceString                = pL.get<std::string>(
"filtered matrix: lumping choice");
 
  704   if (lumpingChoiceString == 
"diag lumping")
 
  706   else if (lumpingChoiceString == 
"distributed lumping")
 
  709   const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
 
  712   bool generateColoringGraph         = pL.get<
bool>(
"aggregation: coloring: use color graph");
 
  713   const bool localizeColoringGraph   = pL.get<
bool>(
"aggregation: coloring: localize color graph");
 
  714   const bool symmetrizeColoringGraph = 
true;
 
  716 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS 
  717   translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
 
  720     std::stringstream ss;
 
  721     ss << 
"dropping scheme = \"" << droppingMethod << 
"\", strength-of-connection measure = \"" << socUsesMeasure << 
"\", strength-of-connection matrix = \"" << socUsesMatrix << 
"\", ";
 
  722     if (socUsesMatrix == 
"distance laplacian")
 
  723       ss << 
"distance laplacian metric = \"" << distanceLaplacianMetric << 
"\", ";
 
  724     ss << 
"threshold = " << threshold << 
", blocksize = " << A->GetFixedBlockSize() << 
", useBlocking = " << useBlocking;
 
  725     ss << 
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
 
  733   if (droppingMethod == 
"cut-drop")
 
  734     TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, 
Exceptions::RuntimeError, 
"For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << 
", needs to be <= 1.0");
 
  747   auto crsA  = toCrsMatrix(A);
 
  748   auto lclA  = crsA->getLocalMatrixDevice();
 
  763     if (useGreedyDirichlet) {
 
  793   auto filtered_rowptr = rowptr_type(
"rowptr", lclA.numRows() + 1);
 
  794   auto graph_rowptr    = rowptr_type(
"rowptr", numNodes + 1);
 
  796   Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
 
  799   auto results = results_view_type(
"results", lclA.nnz());  
 
  808       auto merged_rowptr      = rowptr_type(
"rowptr", numNodes + 1);
 
  812       Kokkos::parallel_scan(
"MergeCount", range, functor, nnz_merged);
 
  814       local_graph_type lclMergedGraph;
 
  815       auto colidx_merged = entries_type(
"entries", nnz_merged);
 
  816       auto values_merged = values_type(
"values", nnz_merged);
 
  818       local_matrix_type lclMergedA = local_matrix_type(
"mergedA",
 
  819                                                        numNodes, nonUniqueMap->getLocalNumElements(),
 
  821                                                        values_merged, merged_rowptr, colidx_merged);
 
  824       Kokkos::parallel_for(
"MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
 
  829     if (threshold != zero) {
 
  830       if (socUsesMatrix == 
"A") {
 
  831         if (socUsesMeasure == 
"unscaled") {
 
  832           VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
 
  833         } 
else if (socUsesMeasure == 
"smoothed aggregation") {
 
  834           VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
 
  835         } 
else if (socUsesMeasure == 
"signed ruge-stueben") {
 
  836           VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
 
  837         } 
else if (socUsesMeasure == 
"signed smoothed aggregation") {
 
  838           VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
 
  840       } 
else if (socUsesMatrix == 
"distance laplacian") {
 
  841         auto coords = Get<RCP<doubleMultiVector>>(currentLevel, 
"Coordinates");
 
  844         LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize"));
 
  845         if (socUsesMeasure == 
"distance laplacian") {
 
  846           LO dim = (
LO)coords->getNumVectors();
 
  848           bool non_unity = 
false;
 
  849           for (
LO i = 0; !non_unity && i < (
LO)dlap_weights.size(); i++) {
 
  850             if (dlap_weights[i] != 1.0) {
 
  855             if ((
LO)dlap_weights.size() == dim) {
 
  856               distanceLaplacianMetric = 
"weighted";
 
  857             } 
else if ((
LO)dlap_weights.size() == interleaved_blocksize * dim)
 
  858               distanceLaplacianMetric = 
"block weighted";
 
  861                                          "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
 
  864               GetOStream(
Statistics1) << 
"Using distance laplacian weights: " << dlap_weights << std::endl;
 
  868         if (socUsesMeasure == 
"unscaled") {
 
  869           VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
 
  870         } 
else if (socUsesMeasure == 
"smoothed aggregation") {
 
  871           VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
 
  872         } 
else if (socUsesMeasure == 
"signed ruge-stueben") {
 
  873           VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
 
  874         } 
else if (socUsesMeasure == 
"signed smoothed aggregation") {
 
  875           VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
 
  879       Kokkos::deep_copy(results, 
KEEP);
 
  882       VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, no_op);
 
  885     if (symmetrizeDroppedGraph) {
 
  887       VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, symmetrize);
 
  892   GO numTotal               = lclA.nnz();
 
  893   GO numDropped             = numTotal - nnz_filtered;
 
  906     local_matrix_type lclFilteredA;
 
  908       lclFilteredA = local_matrix_type(
"filteredA", lclA.graph, lclA.numCols());
 
  910       auto colidx  = entries_type(
"entries", nnz_filtered);
 
  911       auto values  = values_type(
"values", nnz_filtered);
 
  912       lclFilteredA = local_matrix_type(
"filteredA",
 
  913                                        lclA.numRows(), lclA.numCols(),
 
  915                                        values, filtered_rowptr, colidx);
 
  918     local_graph_type lclGraph;
 
  920       auto colidx = entries_type(
"entries", nnz_graph);
 
  921       lclGraph    = local_graph_type(colidx, graph_rowptr);
 
  927         Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
 
  930         Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
 
  935         Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
 
  938         Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
 
  943     filteredA->SetFixedBlockSize(blkSize);
 
  945     if (reuseEigenvalue) {
 
  950       filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
 
  955     graph = 
rcp(
new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, 
"amalgamated graph of A"));
 
  960   if (generateColoringGraph) {
 
  961     SubFactoryMonitor mColoringGraph(*
this, 
"Construct coloring graph", currentLevel);
 
  963     filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
 
  964     graph_rowptr    = rowptr_type(
"rowptr", numNodes + 1);
 
  965     if (localizeColoringGraph) {
 
  967       VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, drop_offrank);
 
  969     if (symmetrizeColoringGraph) {
 
  971       VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, symmetrize);
 
  973     auto colidx            = entries_type(
"entries_coloring_graph", nnz_filtered);
 
  974     auto lclGraph          = local_graph_type(colidx, filtered_rowptr);
 
  976     Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
 
  978     auto colorGraph = 
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), 
"coloring graph"));
 
  979     Set(currentLevel, 
"Coloring Graph", colorGraph);
 
  982   LO dofsPerNode = blkSize;
 
  984   Set(currentLevel, 
"DofsPerNode", dofsPerNode);
 
  985   Set(currentLevel, 
"Graph", graph);
 
  986   Set(currentLevel, 
"A", filteredA);
 
  988   return std::make_tuple(numDropped, boundaryNodes);
 
  992 #endif  // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 
MueLu::DefaultLocalOrdinal LocalOrdinal
 
Lightweight MueLu representation of a compressed row storage graph. 
 
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries. 
 
void setValidator(RCP< const ParameterEntryValidator > const &validator)
 
static GlobalOrdinal CountNegativeDiagonalEntries(const Matrix &A)
Counts the number of negative diagonal entries. 
 
T & get(const std::string &name, T def_value)
 
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold, MueLu::MatrixConstruction::lumpingType &lumpingChoice)
 
Timer to be used in factories. Similar to Monitor but with additional timers. 
 
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
 
One-liner description of what is happening. 
 
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
 
Functor that drops boundary nodes for a blockSize == 1 problem. 
 
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &...functors)
 
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
 
Functor that symmetrizes the dropping decisions. 
 
MueLu::DefaultGlobalOrdinal GlobalOrdinal
 
Functor that drops off-rank entries. 
 
Class that holds all level-specific information. 
 
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level. 
 
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
 
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
 
void DeclareInput(Level ¤tLevel) const 
Input. 
 
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
 
Functor for marking nodes as Dirichlet. 
 
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
 
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
 
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level ¤tLevel) const 
 
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
 
RCP< const ParameterList > GetValidParameterList() const 
Return a const parameter list of valid parameters that setParameterList() will accept. 
 
void Build(Level ¤tLevel) const 
Build an object with this factory. 
 
Functor for marking nodes as Dirichlet based on rowsum. 
 
Functor that serially applies sub-functors to rows. 
 
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
 
#define SET_VALID_ENTRY(name)
 
Functor for marking nodes as Dirichlet in a block operator. 
 
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level ¤tLevel) const 
 
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
 
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
 
Exception throws to report errors in the internal logical of the program. 
 
#define TEUCHOS_ASSERT(assertion_test)
 
ParameterEntry & getEntry(const std::string &name)