MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12 
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
15 #include <sstream>
16 #include <string>
17 #include <tuple>
18 
19 #include "Xpetra_Matrix.hpp"
20 
22 
23 #include "MueLu_AmalgamationInfo.hpp"
24 #include "MueLu_Exceptions.hpp"
25 #include "MueLu_Level.hpp"
26 #include "MueLu_LWGraph_kokkos.hpp"
27 #include "MueLu_MasterList.hpp"
28 #include "MueLu_Monitor.hpp"
29 #include "MueLu_Utilities.hpp"
30 
32 #include "MueLu_DroppingCommon.hpp"
34 
35 // The different dropping algorithms are split up over several translation units. This speeds up compilation and also avoids launch latencies on GPU.
37 #include "MueLu_ScalarDroppingClassical.hpp"
38 #include "MueLu_ScalarDroppingDistanceLaplacian.hpp"
40 #include "MueLu_VectorDroppingClassical.hpp"
41 #include "MueLu_VectorDroppingDistanceLaplacian.hpp"
42 
43 namespace MueLu {
44 
45 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
47  RCP<ParameterList> validParamList = rcp(new ParameterList());
48 
49 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
50  SET_VALID_ENTRY("aggregation: use blocking");
51  SET_VALID_ENTRY("aggregation: drop tol");
52  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
53  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
54  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
55  SET_VALID_ENTRY("aggregation: row sum drop tol");
56  SET_VALID_ENTRY("aggregation: strength-of-connection: matrix");
57  SET_VALID_ENTRY("aggregation: strength-of-connection: measure");
58  SET_VALID_ENTRY("aggregation: drop scheme");
59  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
60  SET_VALID_ENTRY("aggregation: distance laplacian metric");
61  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
62  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
63 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
64  SET_VALID_ENTRY("aggregation: distance laplacian algo");
65  SET_VALID_ENTRY("aggregation: classical algo");
66 #endif
67  SET_VALID_ENTRY("aggregation: symmetrize graph after dropping");
68  SET_VALID_ENTRY("aggregation: coloring: use color graph");
69  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
70 
71  SET_VALID_ENTRY("filtered matrix: use lumping");
72  SET_VALID_ENTRY("filtered matrix: reuse graph");
73  SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
74 
75  SET_VALID_ENTRY("filtered matrix: use root stencil");
76  SET_VALID_ENTRY("filtered matrix: lumping choice");
77  SET_VALID_ENTRY("filtered matrix: use spread lumping");
78  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
79  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
80  SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
81 
82 #undef SET_VALID_ENTRY
83  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
84 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
85  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop"))));
86 #else
87  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop", "signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical", "signed classical distance laplacian", "signed classical sa distance laplacian"))));
88  validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
89  validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
90 #endif
91  validParamList->getEntry("aggregation: strength-of-connection: matrix").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("A", "distance laplacian"))));
92  validParamList->getEntry("aggregation: strength-of-connection: measure").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("smoothed aggregation", "signed smoothed aggregation", "signed ruge-stueben", "unscaled"))));
93  validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
94 
95  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
96  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
97  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
98  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
99  validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
100 
101  return validParamList;
102 }
103 
104 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
106  Input(currentLevel, "A");
107  Input(currentLevel, "UnAmalgamationInfo");
108 
109  const ParameterList& pL = GetParameterList();
110 
111  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
112  bool needCoords = (socUsesMatrix == "distance laplacian");
113 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
114  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
115  needCoords |= (droppingMethod.find("distance laplacian") != std::string::npos);
116 #endif
117  if (needCoords) {
118  Input(currentLevel, "Coordinates");
119  std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
120  if (distLaplMetric == "material")
121  Input(currentLevel, "Material");
122  }
123 
124  bool useBlocking = pL.get<bool>("aggregation: use blocking");
125 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
126  useBlocking |= (droppingMethod.find("block diagonal") != std::string::npos);
127 #endif
128  if (useBlocking) {
129  Input(currentLevel, "BlockNumber");
130  }
131 }
132 
133 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
135  Build(Level& currentLevel) const {
136  auto A = Get<RCP<Matrix>>(currentLevel, "A");
137  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
138  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
139 
140  std::tuple<GlobalOrdinal, boundary_nodes_type> results;
141  if (blkSize == 1)
142  results = BuildScalar(currentLevel);
143  else
144  results = BuildVector(currentLevel);
145 
146  if (GetVerbLevel() & Statistics1) {
147  GlobalOrdinal numDropped = std::get<0>(results);
148  auto boundaryNodes = std::get<1>(results);
149 
150  GO numLocalBoundaryNodes = 0;
151 
152  Kokkos::parallel_reduce(
153  "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
154  KOKKOS_LAMBDA(const LO i, GO& n) {
155  if (boundaryNodes(i))
156  n++;
157  },
158  numLocalBoundaryNodes);
159 
160  if (IsPrint(Statistics1)) {
161  auto comm = A->getRowMap()->getComm();
162 
163  std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
164  std::vector<GlobalOrdinal> globalStats(2);
165  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 2, localStats.data(), globalStats.data());
166 
167  GO numGlobalTotal = A->getGlobalNumEntries();
168  GO numGlobalBoundaryNodes = globalStats[0];
169  GO numGlobalDropped = globalStats[1];
170 
171  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
172  if (numGlobalTotal != 0) {
173  GetOStream(Statistics1) << "Number of dropped entries: "
174  << numGlobalDropped << "/" << numGlobalTotal
175  << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
176  }
177  }
178  }
179 }
180 
181 template <class local_matrix_type, class boundary_nodes_view, class... Functors>
182 void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
183  using local_ordinal_type = typename local_matrix_type::ordinal_type;
184  using execution_space = typename local_matrix_type::execution_space;
185  using range_type = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
186  auto range = range_type(0, boundaryNodes.extent(0));
187  auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, functors...);
188  Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries);
189 }
190 
191 template <class magnitudeType>
192 void translateOldAlgoParam(const Teuchos::ParameterList& pL, std::string& droppingMethod, bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure, bool& symmetrizeDroppedGraph, bool& generateColoringGraph, magnitudeType& threshold, MueLu::MatrixConstruction::lumpingType& lumpingChoice) {
193  std::set<std::string> validDroppingMethods = {"piece-wise", "cut-drop"};
194 
195  if (!pL.get<bool>("filtered matrix: use lumping")) lumpingChoice = MueLu::MatrixConstruction::no_lumping;
196 
197  if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
198  std::string algo = droppingMethod;
199  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
200  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
201 
202  // Remove prefix "block diagonal" from algo
203  if (algo.find("block diagonal") == 0) {
204  useBlocking = true;
205  algo = algo.substr(14);
206  if (algo != "") {
207  algo = algo.substr(1);
208  }
209  }
210 
211  if ((algo == "classical") || (algo == "signed classical sa") || (algo == "signed classical") || (algo == "colored signed classical")) {
212  socUsesMatrix = "A";
213 
214  if (algo == "classical") {
215  socUsesMeasure = "smoothed aggregation";
216  } else if (algo == "signed classical sa") {
217  socUsesMeasure = "signed smoothed aggregation";
218  } else if (algo == "signed classical") {
219  socUsesMeasure = "signed ruge-stueben";
220  } else if (algo == "colored signed classical") {
221  socUsesMeasure = "signed ruge-stueben";
222  generateColoringGraph = true;
223  }
224 
225  if (classicalAlgoStr == "default")
226  droppingMethod = "point-wise";
227  else if (classicalAlgoStr == "unscaled cut") {
228  socUsesMeasure = "unscaled";
229  droppingMethod = "cut-drop";
230  } else if (classicalAlgoStr == "scaled cut") {
231  droppingMethod = "cut-drop";
232  } else if (classicalAlgoStr == "scaled cut symmetric") {
233  droppingMethod = "cut-drop";
234  symmetrizeDroppedGraph = true;
235  }
236  } else if ((algo == "distance laplacian") || (algo == "signed classical sa distance laplacian") || (algo == "signed classical distance laplacian")) {
237  socUsesMatrix = "distance laplacian";
238 
239  if (algo == "distance laplacian") {
240  socUsesMeasure = "smoothed aggregation";
241  } else if (algo == "signed classical sa distance laplacian") {
242  socUsesMeasure = "signed smoothed aggregation";
243  } else if (algo == "signed classical distance laplacian") {
244  socUsesMeasure = "signed ruge-stueben";
245  }
246 
247  if (distanceLaplacianAlgoStr == "default")
248  droppingMethod = "point-wise";
249  else if (distanceLaplacianAlgoStr == "unscaled cut") {
250  socUsesMeasure = "unscaled";
251  droppingMethod = "cut-drop";
252  } else if (distanceLaplacianAlgoStr == "scaled cut") {
253  droppingMethod = "cut-drop";
254  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
255  droppingMethod = "cut-drop";
256  symmetrizeDroppedGraph = true;
257  }
258  } else if (algo == "") {
259  // algo was "block diagonal", but we process and remove the "block diagonal" part
260  socUsesMatrix = "A";
262  }
263  }
264 }
265 
266 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
267 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
268  BuildScalar(Level& currentLevel) const {
269  FactoryMonitor m(*this, "BuildScalar", currentLevel);
270 
273  using local_matrix_type = typename MatrixType::local_matrix_type;
274  using local_graph_type = typename GraphType::local_graph_type;
275  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
276  using entries_type = typename local_graph_type::entries_type::non_const_type;
277  using values_type = typename local_matrix_type::values_type::non_const_type;
278  using device_type = typename Node::device_type;
279  using memory_space = typename device_type::memory_space;
280  using results_view_type = Kokkos::View<DecisionType*, memory_space>;
281  using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
282  using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
283 
284  typedef Teuchos::ScalarTraits<Scalar> STS;
285  const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
286 
287  auto A = Get<RCP<Matrix>>(currentLevel, "A");
288 
290  // Process parameterlist
291  const ParameterList& pL = GetParameterList();
292 
293  // Boundary detection
294  const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
295  const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
296  const LocalOrdinal dirichletNonzeroThreshold = 1;
297 
298  // Dropping
299  bool useBlocking = pL.get<bool>("aggregation: use blocking");
300  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
301  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
302  std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
303  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
304  bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
305  magnitudeType threshold;
306  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
307  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
308  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
309  else
310  threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
311  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
312 
313  // Fill
314  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
315  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
316 
317  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
318  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
319  const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
321  if (lumpingChoiceString == "diag lumping")
323  else if (lumpingChoiceString == "distributed lumping")
325 
326  const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
327 
328  // coloring graph
329  bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
330  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
331  const bool symmetrizeColoringGraph = true;
332 
333 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
334  translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
335 #endif
336 
337  {
338  std::stringstream ss;
339  ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
340  if (socUsesMatrix == "distance laplacian")
341  ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
342  ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
343  ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
344 
345  GetOStream(Runtime0) << ss.str();
346  }
347 
348  TEUCHOS_ASSERT(!useRootStencil);
349  TEUCHOS_ASSERT(!useSpreadLumping);
350  TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
351  if (droppingMethod == "cut-drop")
352  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
353 
355  // We perform four sweeps over the rows of A:
356  // Pass 1: detection of boundary nodes
357  // Pass 2: diagonal extraction
358  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
359  // Pass 4: fill of the filtered matrix
360  //
361  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
362 
363  // TODO: We could merge pass 1 and 2.
364 
365  auto crsA = toCrsMatrix(A);
366  auto lclA = crsA->getLocalMatrixDevice();
367  auto range = range_type(0, lclA.numRows());
368 
370  // Pass 1: Detect boundary nodes
371  //
372  // The following criteria are available:
373  // - BoundaryDetection::PointDirichletFunctor
374  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
375  // - BoundaryDetection::RowSumFunctor
376  // Marks rows as Dirichlet bases on row-sum criterion
377 
378  // Dirichlet nodes
379  auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
380  {
381  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
382 
383  // macro that applies boundary detection functors
384  auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
385 
386  if (rowSumTol <= 0.) {
387  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
388  } else {
389  auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
390  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection, apply_rowsum);
391  }
392  }
393  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
394  // Otherwise we're now done with it now.
395 
397  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
398  // rowptr of filtered matrix
399  //
400  // The following criteria are available:
401  // - Misc::PointwiseDropBoundaryFunctor
402  // Drop all rows that have been marked as Dirichlet
403  // - Misc::DropOffRankFunctor
404  // Drop all entries that are off-rank
405  // - ClassicalDropping::DropFunctor
406  // Classical dropping
407  // - DistanceLaplacian::DropFunctor
408  // Distance Laplacian dropping
409  // - Misc::KeepDiagonalFunctor
410  // Mark diagonal as KEEP
411  // - Misc::MarkSingletonFunctor
412  // Mark singletons after dropping as Dirichlet
413  // - Misc::BlockDiagonalizeFunctor
414  // Drop coupling between blocks
415  //
416  // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
417 
418  // rowptr of filtered A
419  auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
420  // Number of nonzeros of filtered A
421  LocalOrdinal nnz_filtered = 0;
422  // dropping decisions for each entry
423  auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
424  {
425  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
426 
427  if (threshold != zero) {
428  if (socUsesMatrix == "A") {
429  if (socUsesMeasure == "unscaled") {
430  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
431  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
432  } else if (socUsesMeasure == "smoothed aggregation") {
433  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
434  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
435  } else if (socUsesMeasure == "signed ruge-stueben") {
436  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
437  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
438  } else if (socUsesMeasure == "signed smoothed aggregation") {
439  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
440  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
441  }
442  } else if (socUsesMatrix == "distance laplacian") {
443  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
444  if (socUsesMeasure == "unscaled") {
445  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
446  } else if (socUsesMeasure == "smoothed aggregation") {
447  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
448  } else if (socUsesMeasure == "signed ruge-stueben") {
449  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
450  } else if (socUsesMeasure == "signed smoothed aggregation") {
451  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
452  }
453  }
454  } else {
455  Kokkos::deep_copy(results, KEEP);
456 
457  if (symmetrizeDroppedGraph) {
458  auto drop_boundaries = Misc::PointwiseSymmetricDropBoundaryFunctor(*A, boundaryNodes, results);
459  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_boundaries);
460  } else {
461  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
462  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, no_op);
463  }
464  }
465 
466  if (symmetrizeDroppedGraph) {
467  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
468  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
469  }
470  }
471  GO numDropped = lclA.nnz() - nnz_filtered;
472  // We now know the number of entries of filtered A and have the final rowptr.
473 
475  // Pass 4: Create local matrix for filtered A
476  //
477  // Dropped entries are optionally lumped to the diagonal.
478 
479  RCP<Matrix> filteredA;
480  RCP<LWGraph_kokkos> graph;
481  {
482  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
483 
484  local_matrix_type lclFilteredA;
485  local_graph_type lclGraph;
486  if (reuseGraph) {
487  filteredA = MatrixFactory::BuildCopy(A);
488  lclFilteredA = filteredA->getLocalMatrixDevice();
489 
490  auto colidx = entries_type("entries", nnz_filtered);
491  lclGraph = local_graph_type(colidx, filtered_rowptr);
492  } else {
493  auto colidx = entries_type("entries", nnz_filtered);
494  auto values = values_type("values", nnz_filtered);
495  lclFilteredA = local_matrix_type("filteredA",
496  lclA.numRows(), lclA.numCols(),
497  nnz_filtered,
498  values, filtered_rowptr, colidx);
499  }
500 
501  if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
502  if (reuseGraph) {
503  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
504  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
505  } else {
506  if (lumpingChoice == MueLu::MatrixConstruction::diag_lumping) {
507  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::diag_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
508  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
509  } else if (lumpingChoice == MueLu::MatrixConstruction::distributed_lumping) {
510  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::distributed_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
511  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
512  }
513  }
514  } else {
515  if (reuseGraph) {
516  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
517  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
518  } else {
519  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::no_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
520  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
521  }
522  }
523 
524  if (!reuseGraph)
525  filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
526  filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
527 
528  if (reuseEigenvalue) {
529  // Reuse max eigenvalue from A
530  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
531  // the D^{-1}A estimate in A, may as well use it.
532  // NOTE: ML does that too
533  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
534  } else {
535  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
536  }
537 
538  if (!reuseGraph) {
539  // Use graph of filteredA as graph.
540  lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
541  }
542  graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
543  graph->SetBoundaryNodeMap(boundaryNodes);
544  }
545 
546  // Construct a second graph for coloring
547  if (generateColoringGraph) {
548  SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
549 
550  filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
551  if (localizeColoringGraph) {
552  auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
553  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_offrank);
554  }
555  if (symmetrizeColoringGraph) {
556  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
557  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
558  }
559  auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
560  auto lclGraph = local_graph_type(colidx, filtered_rowptr);
561  auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
562  Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
563 
564  auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
565  Set(currentLevel, "Coloring Graph", colorGraph);
566  }
567 
568  LO dofsPerNode = 1;
569  Set(currentLevel, "DofsPerNode", dofsPerNode);
570  Set(currentLevel, "Graph", graph);
571  Set(currentLevel, "A", filteredA);
572 
573  return std::make_tuple(numDropped, boundaryNodes);
574 }
575 
576 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
577 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
578  BuildVector(Level& currentLevel) const {
579  FactoryMonitor m(*this, "BuildVector", currentLevel);
580 
583  using local_matrix_type = typename MatrixType::local_matrix_type;
584  using local_graph_type = typename GraphType::local_graph_type;
585  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
586  using entries_type = typename local_graph_type::entries_type::non_const_type;
587  using values_type = typename local_matrix_type::values_type::non_const_type;
588  using device_type = typename Node::device_type;
589  using memory_space = typename device_type::memory_space;
590  using results_view_type = Kokkos::View<DecisionType*, memory_space>;
591  using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
592  using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
593 
594  typedef Teuchos::ScalarTraits<Scalar> STS;
595  const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
596 
597  auto A = Get<RCP<Matrix>>(currentLevel, "A");
598 
599  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
600  blkSize is the number of storage blocks that must kept together during the amalgamation process.
601 
602  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
603 
604  numPDEs = blkSize * storageblocksize.
605 
606  If numPDEs==1
607  Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
608  No other values makes sense.
609 
610  If numPDEs>1
611  If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
612  If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
613  Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
614  */
615 
616  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
617  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
618 
619  auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
620 
621  const RCP<const Map> rowMap = A->getRowMap();
622  const RCP<const Map> colMap = A->getColMap();
623 
624  // build a node row map (uniqueMap = non-overlapping) and a node column map
625  // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
626  // stored in the AmalgamationInfo class container contain the local node id
627  // given a local dof id. The data is calculated in the AmalgamationFactory and
628  // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
629  const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
630  const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
631  Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
632  Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
633 
634  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
635  rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
636  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
637  colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
638 
639  // get number of local nodes
640  LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
641  typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
642  id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
643  id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
644  Kokkos::deep_copy(rowTranslation, rowTranslationView);
645  Kokkos::deep_copy(colTranslation, colTranslationView);
646 
647  // extract striding information
648  blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
649  LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
650  LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
651  if (A->IsView("stridedMaps") == true) {
652  const RCP<const Map> myMap = A->getRowMap("stridedMaps");
653  const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
654  TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
655  blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
656  blkId = strMap->getStridedBlockId();
657  if (blkId > -1)
658  blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
659  }
660 
661  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
662 
664  // Process parameterlist
665  const ParameterList& pL = GetParameterList();
666 
667  // Boundary detection
668  const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
669  const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
670  const LocalOrdinal dirichletNonzeroThreshold = 1;
671  const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
672  TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
673 
674  // Dropping
675  bool useBlocking = pL.get<bool>("aggregation: use blocking");
676  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
677  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
678  std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
679  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
680  bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
681  magnitudeType threshold;
682  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
683  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
684  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
685  else
686  threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
687  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
688 
689  // Fill
690  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
691  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
692 
693  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
694  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
695  const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
697  if (lumpingChoiceString == "diag lumping")
699  else if (lumpingChoiceString == "distributed lumping")
701 
702  const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
703 
704  // coloring graph
705  bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
706  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
707  const bool symmetrizeColoringGraph = true;
708 
709 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
710  translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
711 #endif
712  {
713  std::stringstream ss;
714  ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
715  if (socUsesMatrix == "distance laplacian")
716  ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
717  ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
718  ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
719 
720  GetOStream(Runtime0) << ss.str();
721  }
722 
723  TEUCHOS_ASSERT(!useRootStencil);
724  TEUCHOS_ASSERT(!useSpreadLumping);
725  TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
726  if (droppingMethod == "cut-drop")
727  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
728 
730  // We perform four sweeps over the rows of A:
731  // Pass 1: detection of boundary nodes
732  // Pass 2: diagonal extraction
733  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
734  // Pass 4: fill of the filtered matrix
735  //
736  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
737 
738  // TODO: We could merge pass 1 and 2.
739 
740  auto crsA = toCrsMatrix(A);
741  auto lclA = crsA->getLocalMatrixDevice();
742  auto range = range_type(0, numNodes);
743 
745  // Pass 1: Detect boundary nodes
746  //
747  // The following criteria are available:
748  // - BoundaryDetection::VectorDirichletFunctor
749  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
750 
751  // Dirichlet nodes
752  auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
753  {
754  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
755 
756  if (useGreedyDirichlet) {
757  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
758  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
759  } else {
760  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
761  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
762  }
763  }
764  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
765  // Otherwise we're now done with it now.
766 
768  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
769  // rowptr of filtered matrix
770  //
771  // The following criteria are available:
772  // - Misc::VectorDropBoundaryFunctor
773  // Drop all rows that have been marked as Dirichlet
774  // - Misc::DropOffRankFunctor
775  // Drop all entries that are off-rank
776  // - ClassicalDropping::DropFunctor
777  // Classical dropping
778  // - DistanceLaplacian::VectorDropFunctor
779  // Distance Laplacian dropping
780  // - Misc::KeepDiagonalFunctor
781  // Mark diagonal as KEEP
782  // - Misc::MarkSingletonFunctor
783  // Mark singletons after dropping as Dirichlet
784 
785  // rowptr of filtered A
786  auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
787  auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
788  // Number of nonzeros of filtered A and graph
789  Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
790 
791  // dropping decisions for each entry
792  auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
793 
794  RCP<Matrix> mergedA;
795  {
796  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
797 
798  {
799  // Construct merged A.
800 
801  auto merged_rowptr = rowptr_type("rowptr", numNodes + 1);
802  LocalOrdinal nnz_merged = 0;
803 
804  auto functor = MatrixConstruction::MergeCountFunctor(lclA, blkPartSize, colTranslation, merged_rowptr);
805  Kokkos::parallel_scan("MergeCount", range, functor, nnz_merged);
806 
807  local_graph_type lclMergedGraph;
808  auto colidx_merged = entries_type("entries", nnz_merged);
809  auto values_merged = values_type("values", nnz_merged);
810 
811  local_matrix_type lclMergedA = local_matrix_type("mergedA",
812  numNodes, nonUniqueMap->getLocalNumElements(),
813  nnz_merged,
814  values_merged, merged_rowptr, colidx_merged);
815 
816  auto fillFunctor = MatrixConstruction::MergeFillFunctor<local_matrix_type>(lclA, blkSize, colTranslation, lclMergedA);
817  Kokkos::parallel_for("MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
818 
819  mergedA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclMergedA, uniqueMap, nonUniqueMap, uniqueMap, uniqueMap);
820  }
821 
822  if (threshold != zero) {
823  if (socUsesMatrix == "A") {
824  if (socUsesMeasure == "unscaled") {
825  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
826  } else if (socUsesMeasure == "smoothed aggregation") {
827  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
828  } else if (socUsesMeasure == "signed ruge-stueben") {
829  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
830  } else if (socUsesMeasure == "signed smoothed aggregation") {
831  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
832  }
833  } else if (socUsesMatrix == "distance laplacian") {
834  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
835 
836  Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
837  LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
838  if (socUsesMeasure == "distance laplacian") {
839  LO dim = (LO)coords->getNumVectors();
840  // If anything isn't 1.0 we need to turn on the weighting
841  bool non_unity = false;
842  for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
843  if (dlap_weights[i] != 1.0) {
844  non_unity = true;
845  }
846  }
847  if (non_unity) {
848  if ((LO)dlap_weights.size() == dim) {
849  distanceLaplacianMetric = "weighted";
850  } else if ((LO)dlap_weights.size() == interleaved_blocksize * dim)
851  distanceLaplacianMetric = "block weighted";
852  else {
853  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
854  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
855  }
856  if (GetVerbLevel() & Statistics1)
857  GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
858  }
859  }
860 
861  if (socUsesMeasure == "unscaled") {
862  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
863  } else if (socUsesMeasure == "smoothed aggregation") {
864  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
865  } else if (socUsesMeasure == "signed ruge-stueben") {
866  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
867  } else if (socUsesMeasure == "signed smoothed aggregation") {
868  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
869  }
870  }
871  } else {
872  Kokkos::deep_copy(results, KEEP);
873 
874  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
875  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, no_op);
876  }
877 
878  if (symmetrizeDroppedGraph) {
879  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
880  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
881  }
882  }
883  LocalOrdinal nnz_filtered = nnz.first;
884  LocalOrdinal nnz_graph = nnz.second;
885  GO numTotal = lclA.nnz();
886  GO numDropped = numTotal - nnz_filtered;
887  // We now know the number of entries of filtered A and have the final rowptr.
888 
890  // Pass 4: Create local matrix for filtered A
891  //
892  // Dropped entries are optionally lumped to the diagonal.
893 
894  RCP<Matrix> filteredA;
895  RCP<LWGraph_kokkos> graph;
896  {
897  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
898 
899  local_matrix_type lclFilteredA;
900  if (reuseGraph) {
901  lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
902  } else {
903  auto colidx = entries_type("entries", nnz_filtered);
904  auto values = values_type("values", nnz_filtered);
905  lclFilteredA = local_matrix_type("filteredA",
906  lclA.numRows(), lclA.numCols(),
907  nnz_filtered,
908  values, filtered_rowptr, colidx);
909  }
910 
911  local_graph_type lclGraph;
912  {
913  auto colidx = entries_type("entries", nnz_graph);
914  lclGraph = local_graph_type(colidx, graph_rowptr);
915  }
916 
917  if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
918  if (reuseGraph) {
919  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
920  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
921  } else {
922  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
923  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
924  }
925  } else {
926  if (reuseGraph) {
927  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
928  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
929  } else {
930  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
931  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
932  }
933  }
934 
935  filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
936  filteredA->SetFixedBlockSize(blkSize);
937 
938  if (reuseEigenvalue) {
939  // Reuse max eigenvalue from A
940  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
941  // the D^{-1}A estimate in A, may as well use it.
942  // NOTE: ML does that too
943  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
944  } else {
945  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
946  }
947 
948  graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
949  graph->SetBoundaryNodeMap(boundaryNodes);
950  }
951 
952  // Construct a second graph for coloring
953  if (generateColoringGraph) {
954  SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
955 
956  filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
957  graph_rowptr = rowptr_type("rowptr", numNodes + 1);
958  if (localizeColoringGraph) {
959  auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
960  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, drop_offrank);
961  }
962  if (symmetrizeColoringGraph) {
963  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
964  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
965  }
966  auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
967  auto lclGraph = local_graph_type(colidx, filtered_rowptr);
968  auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
969  Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
970 
971  auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
972  Set(currentLevel, "Coloring Graph", colorGraph);
973  }
974 
975  LO dofsPerNode = blkSize;
976 
977  Set(currentLevel, "DofsPerNode", dofsPerNode);
978  Set(currentLevel, "Graph", graph);
979  Set(currentLevel, "A", filteredA);
980 
981  return std::make_tuple(numDropped, boundaryNodes);
982 }
983 
984 } // namespace MueLu
985 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
void setValidator(RCP< const ParameterEntryValidator > const &validator)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold, MueLu::MatrixConstruction::lumpingType &lumpingChoice)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
LocalOrdinal LO
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Functor that drops boundary nodes for a blockSize == 1 problem.
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &...functors)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops off-rank entries.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level &currentLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Functor for marking nodes as Dirichlet.
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
Functor that serially applies sub-functors to rows.
size_type size() const
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
#define SET_VALID_ENTRY(name)
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)
bool is_null() const