MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12 
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
15 #include <sstream>
16 #include <string>
17 #include <tuple>
18 
19 #include "Xpetra_Matrix.hpp"
20 
22 
23 #include "MueLu_AmalgamationInfo.hpp"
24 #include "MueLu_Exceptions.hpp"
25 #include "MueLu_Level.hpp"
26 #include "MueLu_LWGraph_kokkos.hpp"
27 #include "MueLu_MasterList.hpp"
28 #include "MueLu_Monitor.hpp"
29 #include "MueLu_Utilities.hpp"
30 
32 #include "MueLu_DroppingCommon.hpp"
34 
35 // The different dropping algorithms are split up over several translation units. This speeds up compilation and also avoids launch latencies on GPU.
37 #include "MueLu_ScalarDroppingClassical.hpp"
38 #include "MueLu_ScalarDroppingDistanceLaplacian.hpp"
40 #include "MueLu_VectorDroppingClassical.hpp"
41 #include "MueLu_VectorDroppingDistanceLaplacian.hpp"
42 
43 namespace MueLu {
44 
45 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
47  RCP<ParameterList> validParamList = rcp(new ParameterList());
48 
49 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
50  SET_VALID_ENTRY("aggregation: use blocking");
51  SET_VALID_ENTRY("aggregation: drop tol");
52  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
53  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
54  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
55  SET_VALID_ENTRY("aggregation: row sum drop tol");
56  SET_VALID_ENTRY("aggregation: strength-of-connection: matrix");
57  SET_VALID_ENTRY("aggregation: strength-of-connection: measure");
58  SET_VALID_ENTRY("aggregation: drop scheme");
59  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
60  SET_VALID_ENTRY("aggregation: distance laplacian metric");
61  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
62  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
63 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
64  SET_VALID_ENTRY("aggregation: distance laplacian algo");
65  SET_VALID_ENTRY("aggregation: classical algo");
66 #endif
67  SET_VALID_ENTRY("aggregation: symmetrize graph after dropping");
68  SET_VALID_ENTRY("aggregation: coloring: use color graph");
69  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
70 
71  SET_VALID_ENTRY("filtered matrix: use lumping");
72  SET_VALID_ENTRY("filtered matrix: reuse graph");
73  SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
74 
75  SET_VALID_ENTRY("filtered matrix: use root stencil");
76  SET_VALID_ENTRY("filtered matrix: lumping choice");
77  SET_VALID_ENTRY("filtered matrix: use spread lumping");
78  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
79  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
80  SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
81  SET_VALID_ENTRY("filtered matrix: count negative diagonals");
82 
83 #undef SET_VALID_ENTRY
84  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
85 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
86  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop"))));
87 #else
88  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop", "signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical", "signed classical distance laplacian", "signed classical sa distance laplacian"))));
89  validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
90  validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
91 #endif
92  validParamList->getEntry("aggregation: strength-of-connection: matrix").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("A", "distance laplacian"))));
93  validParamList->getEntry("aggregation: strength-of-connection: measure").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("smoothed aggregation", "signed smoothed aggregation", "signed ruge-stueben", "unscaled"))));
94  validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
95 
96  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
97  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
98  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
99  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
100  validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
101 
102  return validParamList;
103 }
104 
105 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
107  Input(currentLevel, "A");
108  Input(currentLevel, "UnAmalgamationInfo");
109 
110  const ParameterList& pL = GetParameterList();
111 
112  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
113  bool needCoords = (socUsesMatrix == "distance laplacian");
114 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
115  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
116  needCoords |= (droppingMethod.find("distance laplacian") != std::string::npos);
117 #endif
118  if (needCoords) {
119  Input(currentLevel, "Coordinates");
120  std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
121  if (distLaplMetric == "material")
122  Input(currentLevel, "Material");
123  }
124 
125  bool useBlocking = pL.get<bool>("aggregation: use blocking");
126 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
127  useBlocking |= (droppingMethod.find("block diagonal") != std::string::npos);
128 #endif
129  if (useBlocking) {
130  Input(currentLevel, "BlockNumber");
131  }
132 }
133 
134 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
136  Build(Level& currentLevel) const {
137  auto A = Get<RCP<Matrix>>(currentLevel, "A");
138  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
139  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
140 
141  std::tuple<GlobalOrdinal, boundary_nodes_type> results;
142  if (blkSize == 1)
143  results = BuildScalar(currentLevel);
144  else
145  results = BuildVector(currentLevel);
146 
147  if (GetVerbLevel() & Statistics1) {
148  GlobalOrdinal numDropped = std::get<0>(results);
149  auto boundaryNodes = std::get<1>(results);
150 
151  GO numLocalBoundaryNodes = 0;
152 
153  Kokkos::parallel_reduce(
154  "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
155  KOKKOS_LAMBDA(const LO i, GO& n) {
156  if (boundaryNodes(i))
157  n++;
158  },
159  numLocalBoundaryNodes);
160 
161  if (IsPrint(Statistics1)) {
162  auto comm = A->getRowMap()->getComm();
163 
164  std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
165  std::vector<GlobalOrdinal> globalStats(2);
166  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 2, localStats.data(), globalStats.data());
167 
168  GO numGlobalTotal = A->getGlobalNumEntries();
169  GO numGlobalBoundaryNodes = globalStats[0];
170  GO numGlobalDropped = globalStats[1];
171 
172  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
173  if (numGlobalTotal != 0) {
174  GetOStream(Statistics1) << "Number of dropped entries: "
175  << numGlobalDropped << "/" << numGlobalTotal
176  << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
177  }
178  }
179  }
180 }
181 
182 template <class local_matrix_type, class boundary_nodes_view, class... Functors>
183 void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
184  using local_ordinal_type = typename local_matrix_type::ordinal_type;
185  using execution_space = typename local_matrix_type::execution_space;
186  using range_type = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
187  auto range = range_type(0, boundaryNodes.extent(0));
188  auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, functors...);
189  Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries);
190 }
191 
192 template <class magnitudeType>
193 void translateOldAlgoParam(const Teuchos::ParameterList& pL, std::string& droppingMethod, bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure, bool& symmetrizeDroppedGraph, bool& generateColoringGraph, magnitudeType& threshold, MueLu::MatrixConstruction::lumpingType& lumpingChoice) {
194  std::set<std::string> validDroppingMethods = {"piece-wise", "cut-drop"};
195 
196  if (!pL.get<bool>("filtered matrix: use lumping")) lumpingChoice = MueLu::MatrixConstruction::no_lumping;
197 
198  if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
199  std::string algo = droppingMethod;
200  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
201  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
202 
203  // Remove prefix "block diagonal" from algo
204  if (algo.find("block diagonal") == 0) {
205  useBlocking = true;
206  algo = algo.substr(14);
207  if (algo != "") {
208  algo = algo.substr(1);
209  }
210  }
211 
212  if ((algo == "classical") || (algo == "signed classical sa") || (algo == "signed classical") || (algo == "colored signed classical")) {
213  socUsesMatrix = "A";
214 
215  if (algo == "classical") {
216  socUsesMeasure = "smoothed aggregation";
217  } else if (algo == "signed classical sa") {
218  socUsesMeasure = "signed smoothed aggregation";
219  } else if (algo == "signed classical") {
220  socUsesMeasure = "signed ruge-stueben";
221  } else if (algo == "colored signed classical") {
222  socUsesMeasure = "signed ruge-stueben";
223  generateColoringGraph = true;
224  }
225 
226  if (classicalAlgoStr == "default")
227  droppingMethod = "point-wise";
228  else if (classicalAlgoStr == "unscaled cut") {
229  socUsesMeasure = "unscaled";
230  droppingMethod = "cut-drop";
231  } else if (classicalAlgoStr == "scaled cut") {
232  droppingMethod = "cut-drop";
233  } else if (classicalAlgoStr == "scaled cut symmetric") {
234  droppingMethod = "cut-drop";
235  symmetrizeDroppedGraph = true;
236  }
237  } else if ((algo == "distance laplacian") || (algo == "signed classical sa distance laplacian") || (algo == "signed classical distance laplacian")) {
238  socUsesMatrix = "distance laplacian";
239 
240  if (algo == "distance laplacian") {
241  socUsesMeasure = "smoothed aggregation";
242  } else if (algo == "signed classical sa distance laplacian") {
243  socUsesMeasure = "signed smoothed aggregation";
244  } else if (algo == "signed classical distance laplacian") {
245  socUsesMeasure = "signed ruge-stueben";
246  }
247 
248  if (distanceLaplacianAlgoStr == "default")
249  droppingMethod = "point-wise";
250  else if (distanceLaplacianAlgoStr == "unscaled cut") {
251  socUsesMeasure = "unscaled";
252  droppingMethod = "cut-drop";
253  } else if (distanceLaplacianAlgoStr == "scaled cut") {
254  droppingMethod = "cut-drop";
255  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
256  droppingMethod = "cut-drop";
257  symmetrizeDroppedGraph = true;
258  }
259  } else if (algo == "") {
260  // algo was "block diagonal", but we process and remove the "block diagonal" part
261  socUsesMatrix = "A";
263  }
264  }
265 }
266 
267 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
268 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
269  BuildScalar(Level& currentLevel) const {
270  FactoryMonitor m(*this, "BuildScalar", currentLevel);
271 
274  using local_matrix_type = typename MatrixType::local_matrix_type;
275  using local_graph_type = typename GraphType::local_graph_type;
276  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
277  using entries_type = typename local_graph_type::entries_type::non_const_type;
278  using values_type = typename local_matrix_type::values_type::non_const_type;
279  using device_type = typename Node::device_type;
280  using memory_space = typename device_type::memory_space;
281  using results_view_type = Kokkos::View<DecisionType*, memory_space>;
282  using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
283  using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
284 
285  typedef Teuchos::ScalarTraits<Scalar> STS;
286  const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
287 
288  auto A = Get<RCP<Matrix>>(currentLevel, "A");
289 
291  // Process parameterlist
292  const ParameterList& pL = GetParameterList();
293 
294  // Boundary detection
295  const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
296  const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
297  const LocalOrdinal dirichletNonzeroThreshold = 1;
298 
299  // Dropping
300  bool useBlocking = pL.get<bool>("aggregation: use blocking");
301  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
302  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
303  std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
304  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
305  bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
306  magnitudeType threshold;
307  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
308  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
309  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
310  else
311  threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
312  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
313 
314  // Fill
315  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
316  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
317 
318  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
319  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
320  const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
322  if (lumpingChoiceString == "diag lumping")
324  else if (lumpingChoiceString == "distributed lumping")
326 
327  const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
328 
329  // coloring graph
330  bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
331  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
332  const bool symmetrizeColoringGraph = true;
333 
334 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
335  translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
336 #endif
337 
338  {
339  std::stringstream ss;
340  ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
341  if (socUsesMatrix == "distance laplacian")
342  ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
343  ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
344  ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
345 
346  GetOStream(Runtime0) << ss.str();
347  }
348 
349  TEUCHOS_ASSERT(!useRootStencil);
350  TEUCHOS_ASSERT(!useSpreadLumping);
351  TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
352  if (droppingMethod == "cut-drop")
353  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
354 
356  // We perform four sweeps over the rows of A:
357  // Pass 1: detection of boundary nodes
358  // Pass 2: diagonal extraction
359  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
360  // Pass 4: fill of the filtered matrix
361  //
362  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
363 
364  // TODO: We could merge pass 1 and 2.
365 
366  auto crsA = toCrsMatrix(A);
367  auto lclA = crsA->getLocalMatrixDevice();
368  auto range = range_type(0, lclA.numRows());
369 
371  // Pass 1: Detect boundary nodes
372  //
373  // The following criteria are available:
374  // - BoundaryDetection::PointDirichletFunctor
375  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
376  // - BoundaryDetection::RowSumFunctor
377  // Marks rows as Dirichlet bases on row-sum criterion
378 
379  // Dirichlet nodes
380  auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
381  {
382  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
383 
384  // macro that applies boundary detection functors
385  auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
386 
387  if (rowSumTol <= 0.) {
388  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
389  } else {
390  auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
391  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection, apply_rowsum);
392  }
393  }
394  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
395  // Otherwise we're now done with it now.
396 
398  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
399  // rowptr of filtered matrix
400  //
401  // The following criteria are available:
402  // - Misc::PointwiseDropBoundaryFunctor
403  // Drop all rows that have been marked as Dirichlet
404  // - Misc::DropOffRankFunctor
405  // Drop all entries that are off-rank
406  // - ClassicalDropping::DropFunctor
407  // Classical dropping
408  // - DistanceLaplacian::DropFunctor
409  // Distance Laplacian dropping
410  // - Misc::KeepDiagonalFunctor
411  // Mark diagonal as KEEP
412  // - Misc::MarkSingletonFunctor
413  // Mark singletons after dropping as Dirichlet
414  // - Misc::BlockDiagonalizeFunctor
415  // Drop coupling between blocks
416  //
417  // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
418 
419  // rowptr of filtered A
420  auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
421  // Number of nonzeros of filtered A
422  LocalOrdinal nnz_filtered = 0;
423  // dropping decisions for each entry
424  auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
425  {
426  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
427 
428  if (threshold != zero) {
429  if (socUsesMatrix == "A") {
430  if (socUsesMeasure == "unscaled") {
431  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
432  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
433  } else if (socUsesMeasure == "smoothed aggregation") {
434  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
435  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
436  } else if (socUsesMeasure == "signed ruge-stueben") {
437  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
438  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
439  } else if (socUsesMeasure == "signed smoothed aggregation") {
440  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
441  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
442  }
443  } else if (socUsesMatrix == "distance laplacian") {
444  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
445  if (socUsesMeasure == "unscaled") {
446  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
447  } else if (socUsesMeasure == "smoothed aggregation") {
448  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
449  } else if (socUsesMeasure == "signed ruge-stueben") {
450  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
451  } else if (socUsesMeasure == "signed smoothed aggregation") {
452  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
453  }
454  }
455  } else {
456  Kokkos::deep_copy(results, KEEP);
457 
458  if (symmetrizeDroppedGraph) {
459  auto drop_boundaries = Misc::PointwiseSymmetricDropBoundaryFunctor(*A, boundaryNodes, results);
460  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_boundaries);
461  } else {
462  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
463  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, no_op);
464  }
465  }
466 
467  if (symmetrizeDroppedGraph) {
468  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
469  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
470  }
471  }
472  GO numDropped = lclA.nnz() - nnz_filtered;
473  // We now know the number of entries of filtered A and have the final rowptr.
474 
476  // Pass 4: Create local matrix for filtered A
477  //
478  // Dropped entries are optionally lumped to the diagonal.
479 
480  RCP<Matrix> filteredA;
481  RCP<LWGraph_kokkos> graph;
482  {
483  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
484 
485  local_matrix_type lclFilteredA;
486  local_graph_type lclGraph;
487  if (reuseGraph) {
488  filteredA = MatrixFactory::BuildCopy(A);
489  lclFilteredA = filteredA->getLocalMatrixDevice();
490 
491  auto colidx = entries_type("entries", nnz_filtered);
492  lclGraph = local_graph_type(colidx, filtered_rowptr);
493  } else {
494  auto colidx = entries_type("entries", nnz_filtered);
495  auto values = values_type("values", nnz_filtered);
496  lclFilteredA = local_matrix_type("filteredA",
497  lclA.numRows(), lclA.numCols(),
498  nnz_filtered,
499  values, filtered_rowptr, colidx);
500  }
501 
502  if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
503  if (reuseGraph) {
504  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
505  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
506  } else {
507  if (lumpingChoice == MueLu::MatrixConstruction::diag_lumping) {
508  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::diag_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
509  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
510  } else if (lumpingChoice == MueLu::MatrixConstruction::distributed_lumping) {
511  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::distributed_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
512  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
513  }
514  }
515  } else {
516  if (reuseGraph) {
517  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
518  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
519  } else {
520  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::no_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
521  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
522  }
523  }
524 
525  if (!reuseGraph)
526  filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
527  filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
528 
529  if (reuseEigenvalue) {
530  // Reuse max eigenvalue from A
531  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
532  // the D^{-1}A estimate in A, may as well use it.
533  // NOTE: ML does that too
534  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
535  } else {
536  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
537  }
538 
539  if (!reuseGraph) {
540  // Use graph of filteredA as graph.
541  lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
542  }
543  graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
544  graph->SetBoundaryNodeMap(boundaryNodes);
545  }
546 
547  // Construct a second graph for coloring
548  if (generateColoringGraph) {
549  SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
550 
551  filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
552  if (localizeColoringGraph) {
553  auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
554  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_offrank);
555  }
556  if (symmetrizeColoringGraph) {
557  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
558  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
559  }
560  auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
561  auto lclGraph = local_graph_type(colidx, filtered_rowptr);
562  auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
563  Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
564 
565  auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
566  Set(currentLevel, "Coloring Graph", colorGraph);
567  }
568 
569  if (pL.get<bool>("filtered matrix: count negative diagonals")) {
570  // Count the negative diagonals (and display that information)
572  GetOStream(Runtime0) << "CoalesceDrop: Negative diagonals: " << neg_count << std::endl;
573  }
574 
575  LO dofsPerNode = 1;
576  Set(currentLevel, "DofsPerNode", dofsPerNode);
577  Set(currentLevel, "Graph", graph);
578  Set(currentLevel, "A", filteredA);
579 
580  return std::make_tuple(numDropped, boundaryNodes);
581 }
582 
583 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
584 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
585  BuildVector(Level& currentLevel) const {
586  FactoryMonitor m(*this, "BuildVector", currentLevel);
587 
590  using local_matrix_type = typename MatrixType::local_matrix_type;
591  using local_graph_type = typename GraphType::local_graph_type;
592  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
593  using entries_type = typename local_graph_type::entries_type::non_const_type;
594  using values_type = typename local_matrix_type::values_type::non_const_type;
595  using device_type = typename Node::device_type;
596  using memory_space = typename device_type::memory_space;
597  using results_view_type = Kokkos::View<DecisionType*, memory_space>;
598  using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
599  using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
600 
601  typedef Teuchos::ScalarTraits<Scalar> STS;
602  const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
603 
604  auto A = Get<RCP<Matrix>>(currentLevel, "A");
605 
606  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
607  blkSize is the number of storage blocks that must kept together during the amalgamation process.
608 
609  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
610 
611  numPDEs = blkSize * storageblocksize.
612 
613  If numPDEs==1
614  Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
615  No other values makes sense.
616 
617  If numPDEs>1
618  If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
619  If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
620  Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
621  */
622 
623  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
624  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
625 
626  auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
627 
628  const RCP<const Map> rowMap = A->getRowMap();
629  const RCP<const Map> colMap = A->getColMap();
630 
631  // build a node row map (uniqueMap = non-overlapping) and a node column map
632  // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
633  // stored in the AmalgamationInfo class container contain the local node id
634  // given a local dof id. The data is calculated in the AmalgamationFactory and
635  // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
636  const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
637  const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
638  Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
639  Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
640 
641  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
642  rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
643  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
644  colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
645 
646  // get number of local nodes
647  LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
648  typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
649  id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
650  id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
651  Kokkos::deep_copy(rowTranslation, rowTranslationView);
652  Kokkos::deep_copy(colTranslation, colTranslationView);
653 
654  // extract striding information
655  blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
656  LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
657  LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
658  if (A->IsView("stridedMaps") == true) {
659  const RCP<const Map> myMap = A->getRowMap("stridedMaps");
660  const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
661  TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
662  blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
663  blkId = strMap->getStridedBlockId();
664  if (blkId > -1)
665  blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
666  }
667 
668  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
669 
671  // Process parameterlist
672  const ParameterList& pL = GetParameterList();
673 
674  // Boundary detection
675  const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
676  const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
677  const LocalOrdinal dirichletNonzeroThreshold = 1;
678  const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
679  TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
680 
681  // Dropping
682  bool useBlocking = pL.get<bool>("aggregation: use blocking");
683  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
684  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
685  std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
686  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
687  bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
688  magnitudeType threshold;
689  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
690  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
691  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
692  else
693  threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
694  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
695 
696  // Fill
697  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
698  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
699 
700  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
701  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
702  const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
704  if (lumpingChoiceString == "diag lumping")
706  else if (lumpingChoiceString == "distributed lumping")
708 
709  const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
710 
711  // coloring graph
712  bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
713  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
714  const bool symmetrizeColoringGraph = true;
715 
716 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
717  translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
718 #endif
719  {
720  std::stringstream ss;
721  ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
722  if (socUsesMatrix == "distance laplacian")
723  ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
724  ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
725  ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
726 
727  GetOStream(Runtime0) << ss.str();
728  }
729 
730  TEUCHOS_ASSERT(!useRootStencil);
731  TEUCHOS_ASSERT(!useSpreadLumping);
732  TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
733  if (droppingMethod == "cut-drop")
734  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
735 
737  // We perform four sweeps over the rows of A:
738  // Pass 1: detection of boundary nodes
739  // Pass 2: diagonal extraction
740  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
741  // Pass 4: fill of the filtered matrix
742  //
743  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
744 
745  // TODO: We could merge pass 1 and 2.
746 
747  auto crsA = toCrsMatrix(A);
748  auto lclA = crsA->getLocalMatrixDevice();
749  auto range = range_type(0, numNodes);
750 
752  // Pass 1: Detect boundary nodes
753  //
754  // The following criteria are available:
755  // - BoundaryDetection::VectorDirichletFunctor
756  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
757 
758  // Dirichlet nodes
759  auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
760  {
761  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
762 
763  if (useGreedyDirichlet) {
764  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
765  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
766  } else {
767  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
768  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
769  }
770  }
771  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
772  // Otherwise we're now done with it now.
773 
775  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
776  // rowptr of filtered matrix
777  //
778  // The following criteria are available:
779  // - Misc::VectorDropBoundaryFunctor
780  // Drop all rows that have been marked as Dirichlet
781  // - Misc::DropOffRankFunctor
782  // Drop all entries that are off-rank
783  // - ClassicalDropping::DropFunctor
784  // Classical dropping
785  // - DistanceLaplacian::VectorDropFunctor
786  // Distance Laplacian dropping
787  // - Misc::KeepDiagonalFunctor
788  // Mark diagonal as KEEP
789  // - Misc::MarkSingletonFunctor
790  // Mark singletons after dropping as Dirichlet
791 
792  // rowptr of filtered A
793  auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
794  auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
795  // Number of nonzeros of filtered A and graph
796  Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
797 
798  // dropping decisions for each entry
799  auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
800 
801  RCP<Matrix> mergedA;
802  {
803  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
804 
805  {
806  // Construct merged A.
807 
808  auto merged_rowptr = rowptr_type("rowptr", numNodes + 1);
809  LocalOrdinal nnz_merged = 0;
810 
811  auto functor = MatrixConstruction::MergeCountFunctor(lclA, blkPartSize, colTranslation, merged_rowptr);
812  Kokkos::parallel_scan("MergeCount", range, functor, nnz_merged);
813 
814  local_graph_type lclMergedGraph;
815  auto colidx_merged = entries_type("entries", nnz_merged);
816  auto values_merged = values_type("values", nnz_merged);
817 
818  local_matrix_type lclMergedA = local_matrix_type("mergedA",
819  numNodes, nonUniqueMap->getLocalNumElements(),
820  nnz_merged,
821  values_merged, merged_rowptr, colidx_merged);
822 
823  auto fillFunctor = MatrixConstruction::MergeFillFunctor<local_matrix_type>(lclA, blkSize, colTranslation, lclMergedA);
824  Kokkos::parallel_for("MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
825 
826  mergedA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclMergedA, uniqueMap, nonUniqueMap, uniqueMap, uniqueMap);
827  }
828 
829  if (threshold != zero) {
830  if (socUsesMatrix == "A") {
831  if (socUsesMeasure == "unscaled") {
832  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
833  } else if (socUsesMeasure == "smoothed aggregation") {
834  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
835  } else if (socUsesMeasure == "signed ruge-stueben") {
836  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
837  } else if (socUsesMeasure == "signed smoothed aggregation") {
838  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
839  }
840  } else if (socUsesMatrix == "distance laplacian") {
841  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
842 
843  Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
844  LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
845  if (socUsesMeasure == "distance laplacian") {
846  LO dim = (LO)coords->getNumVectors();
847  // If anything isn't 1.0 we need to turn on the weighting
848  bool non_unity = false;
849  for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
850  if (dlap_weights[i] != 1.0) {
851  non_unity = true;
852  }
853  }
854  if (non_unity) {
855  if ((LO)dlap_weights.size() == dim) {
856  distanceLaplacianMetric = "weighted";
857  } else if ((LO)dlap_weights.size() == interleaved_blocksize * dim)
858  distanceLaplacianMetric = "block weighted";
859  else {
860  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
861  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
862  }
863  if (GetVerbLevel() & Statistics1)
864  GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
865  }
866  }
867 
868  if (socUsesMeasure == "unscaled") {
869  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
870  } else if (socUsesMeasure == "smoothed aggregation") {
871  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
872  } else if (socUsesMeasure == "signed ruge-stueben") {
873  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
874  } else if (socUsesMeasure == "signed smoothed aggregation") {
875  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
876  }
877  }
878  } else {
879  Kokkos::deep_copy(results, KEEP);
880 
881  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
882  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, no_op);
883  }
884 
885  if (symmetrizeDroppedGraph) {
886  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
887  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
888  }
889  }
890  LocalOrdinal nnz_filtered = nnz.first;
891  LocalOrdinal nnz_graph = nnz.second;
892  GO numTotal = lclA.nnz();
893  GO numDropped = numTotal - nnz_filtered;
894  // We now know the number of entries of filtered A and have the final rowptr.
895 
897  // Pass 4: Create local matrix for filtered A
898  //
899  // Dropped entries are optionally lumped to the diagonal.
900 
901  RCP<Matrix> filteredA;
902  RCP<LWGraph_kokkos> graph;
903  {
904  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
905 
906  local_matrix_type lclFilteredA;
907  if (reuseGraph) {
908  lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
909  } else {
910  auto colidx = entries_type("entries", nnz_filtered);
911  auto values = values_type("values", nnz_filtered);
912  lclFilteredA = local_matrix_type("filteredA",
913  lclA.numRows(), lclA.numCols(),
914  nnz_filtered,
915  values, filtered_rowptr, colidx);
916  }
917 
918  local_graph_type lclGraph;
919  {
920  auto colidx = entries_type("entries", nnz_graph);
921  lclGraph = local_graph_type(colidx, graph_rowptr);
922  }
923 
924  if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
925  if (reuseGraph) {
926  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
927  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
928  } else {
929  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
930  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
931  }
932  } else {
933  if (reuseGraph) {
934  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
935  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
936  } else {
937  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
938  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
939  }
940  }
941 
942  filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
943  filteredA->SetFixedBlockSize(blkSize);
944 
945  if (reuseEigenvalue) {
946  // Reuse max eigenvalue from A
947  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
948  // the D^{-1}A estimate in A, may as well use it.
949  // NOTE: ML does that too
950  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
951  } else {
952  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
953  }
954 
955  graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
956  graph->SetBoundaryNodeMap(boundaryNodes);
957  }
958 
959  // Construct a second graph for coloring
960  if (generateColoringGraph) {
961  SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
962 
963  filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
964  graph_rowptr = rowptr_type("rowptr", numNodes + 1);
965  if (localizeColoringGraph) {
966  auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
967  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, drop_offrank);
968  }
969  if (symmetrizeColoringGraph) {
970  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
971  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
972  }
973  auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
974  auto lclGraph = local_graph_type(colidx, filtered_rowptr);
975  auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
976  Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
977 
978  auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
979  Set(currentLevel, "Coloring Graph", colorGraph);
980  }
981 
982  LO dofsPerNode = blkSize;
983 
984  Set(currentLevel, "DofsPerNode", dofsPerNode);
985  Set(currentLevel, "Graph", graph);
986  Set(currentLevel, "A", filteredA);
987 
988  return std::make_tuple(numDropped, boundaryNodes);
989 }
990 
991 } // namespace MueLu
992 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
void setValidator(RCP< const ParameterEntryValidator > const &validator)
static GlobalOrdinal CountNegativeDiagonalEntries(const Matrix &A)
Counts the number of negative diagonal entries.
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold, MueLu::MatrixConstruction::lumpingType &lumpingChoice)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
LocalOrdinal LO
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Functor that drops boundary nodes for a blockSize == 1 problem.
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &...functors)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops off-rank entries.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level &currentLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Functor for marking nodes as Dirichlet.
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
Functor that serially applies sub-functors to rows.
size_type size() const
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
#define SET_VALID_ENTRY(name)
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)
bool is_null() const