MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12 
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
15 #include <sstream>
16 #include <string>
17 #include <tuple>
18 
19 #include "Xpetra_Matrix.hpp"
20 
22 
23 #include "MueLu_AmalgamationInfo.hpp"
24 #include "MueLu_Exceptions.hpp"
25 #include "MueLu_Level.hpp"
26 #include "MueLu_LWGraph_kokkos.hpp"
27 #include "MueLu_MasterList.hpp"
28 #include "MueLu_Monitor.hpp"
29 #include "MueLu_Utilities.hpp"
30 
31 // #define MUELU_COALESCE_DROP_DEBUG 1
32 
34 #include "MueLu_DroppingCommon.hpp"
36 
37 // The different dropping algorithms are split up over several translation units. This speeds up compilation and also avoids launch latencies on GPU.
39 #include "MueLu_ScalarDroppingClassical.hpp"
40 #include "MueLu_ScalarDroppingDistanceLaplacian.hpp"
42 #include "MueLu_VectorDroppingClassical.hpp"
43 #include "MueLu_VectorDroppingDistanceLaplacian.hpp"
44 
45 namespace MueLu {
46 
47 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
49  RCP<ParameterList> validParamList = rcp(new ParameterList());
50 
51 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
52  SET_VALID_ENTRY("aggregation: use blocking");
53  SET_VALID_ENTRY("aggregation: drop tol");
54  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
55  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
56  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
57  SET_VALID_ENTRY("aggregation: row sum drop tol");
58  SET_VALID_ENTRY("aggregation: strength-of-connection: matrix");
59  SET_VALID_ENTRY("aggregation: strength-of-connection: measure");
60  SET_VALID_ENTRY("aggregation: drop scheme");
61  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
62  SET_VALID_ENTRY("aggregation: distance laplacian metric");
63  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
64  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
65 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
66  SET_VALID_ENTRY("aggregation: distance laplacian algo");
67  SET_VALID_ENTRY("aggregation: classical algo");
68 #endif
69  SET_VALID_ENTRY("aggregation: symmetrize graph after dropping");
70  SET_VALID_ENTRY("aggregation: coloring: use color graph");
71  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
72 
73  SET_VALID_ENTRY("filtered matrix: use lumping");
74  SET_VALID_ENTRY("filtered matrix: reuse graph");
75  SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
76 
77  SET_VALID_ENTRY("filtered matrix: use root stencil");
78  SET_VALID_ENTRY("filtered matrix: lumping choice");
79  SET_VALID_ENTRY("filtered matrix: use spread lumping");
80  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
81  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
82  SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
83 
84 #undef SET_VALID_ENTRY
85  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
86 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
87  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop"))));
88 #else
89  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop", "signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical", "signed classical distance laplacian", "signed classical sa distance laplacian"))));
90  validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
91  validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
92 #endif
93  validParamList->getEntry("aggregation: strength-of-connection: matrix").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("A", "distance laplacian"))));
94  validParamList->getEntry("aggregation: strength-of-connection: measure").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("smoothed aggregation", "signed smoothed aggregation", "signed ruge-stueben", "unscaled"))));
95  validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
96 
97  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
98  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
99  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
100  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
101  validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
102 
103  return validParamList;
104 }
105 
106 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
108  Input(currentLevel, "A");
109  Input(currentLevel, "UnAmalgamationInfo");
110 
111  const ParameterList& pL = GetParameterList();
112 
113  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
114  bool needCoords = (socUsesMatrix == "distance laplacian");
115 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
116  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
117  needCoords |= (droppingMethod.find("distance laplacian") != std::string::npos);
118 #endif
119  if (needCoords) {
120  Input(currentLevel, "Coordinates");
121  std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
122  if (distLaplMetric == "material")
123  Input(currentLevel, "Material");
124  }
125 
126  bool useBlocking = pL.get<bool>("aggregation: use blocking");
127 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
128  useBlocking |= (droppingMethod.find("block diagonal") != std::string::npos);
129 #endif
130  if (useBlocking) {
131  Input(currentLevel, "BlockNumber");
132  }
133 }
134 
135 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
137  Build(Level& currentLevel) const {
138  auto A = Get<RCP<Matrix>>(currentLevel, "A");
139  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
140  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
141 
142  std::tuple<GlobalOrdinal, boundary_nodes_type> results;
143  if (blkSize == 1)
144  results = BuildScalar(currentLevel);
145  else
146  results = BuildVector(currentLevel);
147 
148  if (GetVerbLevel() & Statistics1) {
149  GlobalOrdinal numDropped = std::get<0>(results);
150  auto boundaryNodes = std::get<1>(results);
151 
152  GO numLocalBoundaryNodes = 0;
153 
154  Kokkos::parallel_reduce(
155  "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
156  KOKKOS_LAMBDA(const LO i, GO& n) {
157  if (boundaryNodes(i))
158  n++;
159  },
160  numLocalBoundaryNodes);
161 
162  if (IsPrint(Statistics1)) {
163  auto comm = A->getRowMap()->getComm();
164 
165  std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
166  std::vector<GlobalOrdinal> globalStats(2);
167  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 2, localStats.data(), globalStats.data());
168 
169  GO numGlobalTotal = A->getGlobalNumEntries();
170  GO numGlobalBoundaryNodes = globalStats[0];
171  GO numGlobalDropped = globalStats[1];
172 
173  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
174  if (numGlobalTotal != 0) {
175  GetOStream(Statistics1) << "Number of dropped entries: "
176  << numGlobalDropped << "/" << numGlobalTotal
177  << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
178  }
179  }
180  }
181 }
182 
183 template <class local_matrix_type, class boundary_nodes_view, class... Functors>
184 void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
185  using local_ordinal_type = typename local_matrix_type::ordinal_type;
186  using execution_space = typename local_matrix_type::execution_space;
187  using range_type = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
188  auto range = range_type(0, boundaryNodes.extent(0));
189  auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, functors...);
190  Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries);
191 }
192 
193 template <class magnitudeType>
194 void translateOldAlgoParam(const Teuchos::ParameterList& pL, std::string& droppingMethod, bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure, bool& symmetrizeDroppedGraph, bool& generateColoringGraph, magnitudeType& threshold, MueLu::MatrixConstruction::lumpingType& lumpingChoice) {
195  std::set<std::string> validDroppingMethods = {"piece-wise", "cut-drop"};
196 
197  if (!pL.get<bool>("filtered matrix: use lumping")) lumpingChoice = MueLu::MatrixConstruction::no_lumping;
198 
199  if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
200  std::string algo = droppingMethod;
201  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
202  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
203 
204  // Remove prefix "block diagonal" from algo
205  if (algo.find("block diagonal") == 0) {
206  useBlocking = true;
207  algo = algo.substr(14);
208  if (algo != "") {
209  algo = algo.substr(1);
210  }
211  }
212 
213  if ((algo == "classical") || (algo == "signed classical sa") || (algo == "signed classical") || (algo == "colored signed classical")) {
214  socUsesMatrix = "A";
215 
216  if (algo == "classical") {
217  socUsesMeasure = "smoothed aggregation";
218  } else if (algo == "signed classical sa") {
219  socUsesMeasure = "signed smoothed aggregation";
220  } else if (algo == "signed classical") {
221  socUsesMeasure = "signed ruge-stueben";
222  } else if (algo == "colored signed classical") {
223  socUsesMeasure = "signed ruge-stueben";
224  generateColoringGraph = true;
225  }
226 
227  if (classicalAlgoStr == "default")
228  droppingMethod = "point-wise";
229  else if (classicalAlgoStr == "unscaled cut") {
230  socUsesMeasure = "unscaled";
231  droppingMethod = "cut-drop";
232  } else if (classicalAlgoStr == "scaled cut") {
233  droppingMethod = "cut-drop";
234  } else if (classicalAlgoStr == "scaled cut symmetric") {
235  droppingMethod = "cut-drop";
236  symmetrizeDroppedGraph = true;
237  }
238  } else if ((algo == "distance laplacian") || (algo == "signed classical sa distance laplacian") || (algo == "signed classical distance laplacian")) {
239  socUsesMatrix = "distance laplacian";
240 
241  if (algo == "distance laplacian") {
242  socUsesMeasure = "smoothed aggregation";
243  } else if (algo == "signed classical sa distance laplacian") {
244  socUsesMeasure = "signed smoothed aggregation";
245  } else if (algo == "signed classical distance laplacian") {
246  socUsesMeasure = "signed ruge-stueben";
247  }
248 
249  if (distanceLaplacianAlgoStr == "default")
250  droppingMethod = "point-wise";
251  else if (distanceLaplacianAlgoStr == "unscaled cut") {
252  socUsesMeasure = "unscaled";
253  droppingMethod = "cut-drop";
254  } else if (distanceLaplacianAlgoStr == "scaled cut") {
255  droppingMethod = "cut-drop";
256  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
257  droppingMethod = "cut-drop";
258  symmetrizeDroppedGraph = true;
259  }
260  } else if (algo == "") {
261  // algo was "block diagonal", but we process and remove the "block diagonal" part
262  socUsesMatrix = "A";
264  }
265  }
266 }
267 
268 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
269 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
270  BuildScalar(Level& currentLevel) const {
271  FactoryMonitor m(*this, "BuildScalar", currentLevel);
272 
275  using local_matrix_type = typename MatrixType::local_matrix_type;
276  using local_graph_type = typename GraphType::local_graph_type;
277  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
278  using entries_type = typename local_graph_type::entries_type::non_const_type;
279  using values_type = typename local_matrix_type::values_type::non_const_type;
280  using device_type = typename Node::device_type;
281  using memory_space = typename device_type::memory_space;
282  using results_view_type = Kokkos::View<DecisionType*, memory_space>;
283  using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
284  using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
285 
286  typedef Teuchos::ScalarTraits<Scalar> STS;
287  const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
288 
289  auto A = Get<RCP<Matrix>>(currentLevel, "A");
290 
292  // Process parameterlist
293  const ParameterList& pL = GetParameterList();
294 
295  // Boundary detection
296  const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
297  const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
298  const LocalOrdinal dirichletNonzeroThreshold = 1;
299 
300  // Dropping
301  bool useBlocking = pL.get<bool>("aggregation: use blocking");
302  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
303  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
304  std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
305  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
306  bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
307  magnitudeType threshold;
308  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
309  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
310  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
311  else
312  threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
313  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
314 
315  // Fill
316  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
317  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
318 
319  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
320  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
321  const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
323  if (lumpingChoiceString == "diag lumping")
325  else if (lumpingChoiceString == "distributed lumping")
327 
328  const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
329 
330  // coloring graph
331  bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
332  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
333  const bool symmetrizeColoringGraph = true;
334 
335 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
336  translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
337 #endif
338 
339  {
340  std::stringstream ss;
341  ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
342  if (socUsesMatrix == "distance laplacian")
343  ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
344  ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
345  ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
346 
347  GetOStream(Runtime0) << ss.str();
348  }
349 
350  TEUCHOS_ASSERT(!useRootStencil);
351  TEUCHOS_ASSERT(!useSpreadLumping);
352  TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
353  if (droppingMethod == "cut-drop")
354  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
355 
357  // We perform four sweeps over the rows of A:
358  // Pass 1: detection of boundary nodes
359  // Pass 2: diagonal extraction
360  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
361  // Pass 4: fill of the filtered matrix
362  //
363  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
364 
365  // TODO: We could merge pass 1 and 2.
366 
367  auto crsA = toCrsMatrix(A);
368  auto lclA = crsA->getLocalMatrixDevice();
369  auto range = range_type(0, lclA.numRows());
370 
372  // Pass 1: Detect boundary nodes
373  //
374  // The following criteria are available:
375  // - BoundaryDetection::PointDirichletFunctor
376  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
377  // - BoundaryDetection::RowSumFunctor
378  // Marks rows as Dirichlet bases on row-sum criterion
379 
380  // Dirichlet nodes
381  auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
382  {
383  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
384 
385  // macro that applies boundary detection functors
386  auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
387 
388  if (rowSumTol <= 0.) {
389  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
390  } else {
391  auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
392  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection, apply_rowsum);
393  }
394  }
395  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
396  // Otherwise we're now done with it now.
397 
399  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
400  // rowptr of filtered matrix
401  //
402  // The following criteria are available:
403  // - Misc::PointwiseDropBoundaryFunctor
404  // Drop all rows that have been marked as Dirichlet
405  // - Misc::DropOffRankFunctor
406  // Drop all entries that are off-rank
407  // - ClassicalDropping::DropFunctor
408  // Classical dropping
409  // - DistanceLaplacian::DropFunctor
410  // Distance Laplacian dropping
411  // - Misc::KeepDiagonalFunctor
412  // Mark diagonal as KEEP
413  // - Misc::MarkSingletonFunctor
414  // Mark singletons after dropping as Dirichlet
415  // - Misc::BlockDiagonalizeFunctor
416  // Drop coupling between blocks
417  //
418  // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
419 
420  // rowptr of filtered A
421  auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
422  // Number of nonzeros of filtered A
423  LocalOrdinal nnz_filtered = 0;
424  // dropping decisions for each entry
425  auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
426  {
427  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
428 
429  if (threshold != zero) {
430  if (socUsesMatrix == "A") {
431  if (socUsesMeasure == "unscaled") {
432  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
433  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
434  } else if (socUsesMeasure == "smoothed aggregation") {
435  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
436  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
437  } else if (socUsesMeasure == "signed ruge-stueben") {
438  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
439  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
440  } else if (socUsesMeasure == "signed smoothed aggregation") {
441  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
442  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
443  }
444  } else if (socUsesMatrix == "distance laplacian") {
445  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
446  if (socUsesMeasure == "unscaled") {
447  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
448  } else if (socUsesMeasure == "smoothed aggregation") {
449  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
450  } else if (socUsesMeasure == "signed ruge-stueben") {
451  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
452  } else if (socUsesMeasure == "signed smoothed aggregation") {
453  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
454  }
455  }
456  } else {
457  Kokkos::deep_copy(results, KEEP);
458 
459  if (symmetrizeDroppedGraph) {
460  auto drop_boundaries = Misc::PointwiseSymmetricDropBoundaryFunctor(*A, boundaryNodes, results);
461  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_boundaries);
462  } else {
463  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
464  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, no_op);
465  }
466  }
467 
468  if (symmetrizeDroppedGraph) {
469  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
470  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
471  }
472  }
473  GO numDropped = lclA.nnz() - nnz_filtered;
474  // We now know the number of entries of filtered A and have the final rowptr.
475 
477  // Pass 4: Create local matrix for filtered A
478  //
479  // Dropped entries are optionally lumped to the diagonal.
480 
481  RCP<Matrix> filteredA;
482  RCP<LWGraph_kokkos> graph;
483  {
484  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
485 
486  local_matrix_type lclFilteredA;
487  local_graph_type lclGraph;
488  if (reuseGraph) {
489  filteredA = MatrixFactory::BuildCopy(A);
490  lclFilteredA = filteredA->getLocalMatrixDevice();
491 
492  auto colidx = entries_type("entries", nnz_filtered);
493  lclGraph = local_graph_type(colidx, filtered_rowptr);
494  } else {
495  auto colidx = entries_type("entries", nnz_filtered);
496  auto values = values_type("values", nnz_filtered);
497  lclFilteredA = local_matrix_type("filteredA",
498  lclA.numRows(), lclA.numCols(),
499  nnz_filtered,
500  values, filtered_rowptr, colidx);
501  }
502 
503  if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
504  if (reuseGraph) {
505  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
506  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
507  } else {
508  if (lumpingChoice == MueLu::MatrixConstruction::diag_lumping) {
509  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::diag_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
510  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
511  } else if (lumpingChoice == MueLu::MatrixConstruction::distributed_lumping) {
512  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::distributed_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
513  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
514  }
515  }
516  } else {
517  if (reuseGraph) {
518  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
519  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
520  } else {
521  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::no_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
522  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
523  }
524  }
525 
526  if (!reuseGraph)
527  filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
528  filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
529 
530  if (reuseEigenvalue) {
531  // Reuse max eigenvalue from A
532  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
533  // the D^{-1}A estimate in A, may as well use it.
534  // NOTE: ML does that too
535  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
536  } else {
537  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
538  }
539 
540  if (!reuseGraph) {
541  // Use graph of filteredA as graph.
542  lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
543  }
544  graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
545  graph->SetBoundaryNodeMap(boundaryNodes);
546  }
547 
548  // Construct a second graph for coloring
549  if (generateColoringGraph) {
550  SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
551 
552  filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
553  if (localizeColoringGraph) {
554  auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
555  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_offrank);
556  }
557  if (symmetrizeColoringGraph) {
558  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
559  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
560  }
561  auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
562  auto lclGraph = local_graph_type(colidx, filtered_rowptr);
563  auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
564  Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
565 
566  auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
567  Set(currentLevel, "Coloring Graph", colorGraph);
568  }
569 
570  LO dofsPerNode = 1;
571  Set(currentLevel, "DofsPerNode", dofsPerNode);
572  Set(currentLevel, "Graph", graph);
573  Set(currentLevel, "A", filteredA);
574 
575  return std::make_tuple(numDropped, boundaryNodes);
576 }
577 
578 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
579 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
580  BuildVector(Level& currentLevel) const {
581  FactoryMonitor m(*this, "BuildVector", currentLevel);
582 
585  using local_matrix_type = typename MatrixType::local_matrix_type;
586  using local_graph_type = typename GraphType::local_graph_type;
587  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
588  using entries_type = typename local_graph_type::entries_type::non_const_type;
589  using values_type = typename local_matrix_type::values_type::non_const_type;
590  using device_type = typename Node::device_type;
591  using memory_space = typename device_type::memory_space;
592  using results_view_type = Kokkos::View<DecisionType*, memory_space>;
593  using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
594  using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
595 
596  typedef Teuchos::ScalarTraits<Scalar> STS;
597  const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
598 
599  auto A = Get<RCP<Matrix>>(currentLevel, "A");
600 
601  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
602  blkSize is the number of storage blocks that must kept together during the amalgamation process.
603 
604  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
605 
606  numPDEs = blkSize * storageblocksize.
607 
608  If numPDEs==1
609  Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
610  No other values makes sense.
611 
612  If numPDEs>1
613  If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
614  If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
615  Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
616  */
617 
618  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
619  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
620 
621  auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
622 
623  const RCP<const Map> rowMap = A->getRowMap();
624  const RCP<const Map> colMap = A->getColMap();
625 
626  // build a node row map (uniqueMap = non-overlapping) and a node column map
627  // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
628  // stored in the AmalgamationInfo class container contain the local node id
629  // given a local dof id. The data is calculated in the AmalgamationFactory and
630  // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
631  const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
632  const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
633  Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
634  Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
635 
636  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
637  rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
638  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
639  colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
640 
641  // get number of local nodes
642  LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
643  typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
644  id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
645  id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
646  Kokkos::deep_copy(rowTranslation, rowTranslationView);
647  Kokkos::deep_copy(colTranslation, colTranslationView);
648 
649  // extract striding information
650  blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
651  LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
652  LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
653  if (A->IsView("stridedMaps") == true) {
654  const RCP<const Map> myMap = A->getRowMap("stridedMaps");
655  const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
656  TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
657  blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
658  blkId = strMap->getStridedBlockId();
659  if (blkId > -1)
660  blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
661  }
662 
663  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
664 
666  // Process parameterlist
667  const ParameterList& pL = GetParameterList();
668 
669  // Boundary detection
670  const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
671  const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
672  const LocalOrdinal dirichletNonzeroThreshold = 1;
673  const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
674  TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
675 
676  // Dropping
677  bool useBlocking = pL.get<bool>("aggregation: use blocking");
678  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
679  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
680  std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
681  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
682  bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
683  magnitudeType threshold;
684  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
685  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
686  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
687  else
688  threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
689  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
690 
691  // Fill
692  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
693  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
694 
695  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
696  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
697  const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
699  if (lumpingChoiceString == "diag lumping")
701  else if (lumpingChoiceString == "distributed lumping")
703 
704  const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
705 
706  // coloring graph
707  bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
708  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
709  const bool symmetrizeColoringGraph = true;
710 
711 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
712  translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
713 #endif
714  {
715  std::stringstream ss;
716  ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
717  if (socUsesMatrix == "distance laplacian")
718  ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
719  ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
720  ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
721 
722  GetOStream(Runtime0) << ss.str();
723  }
724 
725  TEUCHOS_ASSERT(!useRootStencil);
726  TEUCHOS_ASSERT(!useSpreadLumping);
727  TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
728  if (droppingMethod == "cut-drop")
729  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
730 
732  // We perform four sweeps over the rows of A:
733  // Pass 1: detection of boundary nodes
734  // Pass 2: diagonal extraction
735  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
736  // Pass 4: fill of the filtered matrix
737  //
738  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
739 
740  // TODO: We could merge pass 1 and 2.
741 
742  auto crsA = toCrsMatrix(A);
743  auto lclA = crsA->getLocalMatrixDevice();
744  auto range = range_type(0, numNodes);
745 
747  // Pass 1: Detect boundary nodes
748  //
749  // The following criteria are available:
750  // - BoundaryDetection::VectorDirichletFunctor
751  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
752 
753  // Dirichlet nodes
754  auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
755  {
756  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
757 
758  if (useGreedyDirichlet) {
759  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
760  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
761  } else {
762  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
763  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
764  }
765  }
766  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
767  // Otherwise we're now done with it now.
768 
770  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
771  // rowptr of filtered matrix
772  //
773  // The following criteria are available:
774  // - Misc::VectorDropBoundaryFunctor
775  // Drop all rows that have been marked as Dirichlet
776  // - Misc::DropOffRankFunctor
777  // Drop all entries that are off-rank
778  // - ClassicalDropping::DropFunctor
779  // Classical dropping
780  // - DistanceLaplacian::VectorDropFunctor
781  // Distance Laplacian dropping
782  // - Misc::KeepDiagonalFunctor
783  // Mark diagonal as KEEP
784  // - Misc::MarkSingletonFunctor
785  // Mark singletons after dropping as Dirichlet
786 
787  // rowptr of filtered A
788  auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
789  auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
790  // Number of nonzeros of filtered A and graph
791  Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
792 
793  // dropping decisions for each entry
794  auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
795 
796  RCP<Matrix> mergedA;
797  {
798  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
799 
800  {
801  // Construct merged A.
802 
803  auto merged_rowptr = rowptr_type("rowptr", numNodes + 1);
804  LocalOrdinal nnz_merged = 0;
805 
806  auto functor = MatrixConstruction::MergeCountFunctor(lclA, blkPartSize, colTranslation, merged_rowptr);
807  Kokkos::parallel_scan("MergeCount", range, functor, nnz_merged);
808 
809  local_graph_type lclMergedGraph;
810  auto colidx_merged = entries_type("entries", nnz_merged);
811  auto values_merged = values_type("values", nnz_merged);
812 
813  local_matrix_type lclMergedA = local_matrix_type("mergedA",
814  numNodes, nonUniqueMap->getLocalNumElements(),
815  nnz_merged,
816  values_merged, merged_rowptr, colidx_merged);
817 
818  auto fillFunctor = MatrixConstruction::MergeFillFunctor<local_matrix_type>(lclA, blkSize, colTranslation, lclMergedA);
819  Kokkos::parallel_for("MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
820 
821  mergedA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclMergedA, uniqueMap, nonUniqueMap, uniqueMap, uniqueMap);
822  }
823 
824  if (threshold != zero) {
825  if (socUsesMatrix == "A") {
826  if (socUsesMeasure == "unscaled") {
827  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
828  } else if (socUsesMeasure == "smoothed aggregation") {
829  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
830  } else if (socUsesMeasure == "signed ruge-stueben") {
831  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
832  } else if (socUsesMeasure == "signed smoothed aggregation") {
833  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
834  }
835  } else if (socUsesMatrix == "distance laplacian") {
836  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
837 
838  Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
839  LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
840  if (socUsesMeasure == "distance laplacian") {
841  LO dim = (LO)coords->getNumVectors();
842  // If anything isn't 1.0 we need to turn on the weighting
843  bool non_unity = false;
844  for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
845  if (dlap_weights[i] != 1.0) {
846  non_unity = true;
847  }
848  }
849  if (non_unity) {
850  if ((LO)dlap_weights.size() == dim) {
851  distanceLaplacianMetric = "weighted";
852  } else if ((LO)dlap_weights.size() == interleaved_blocksize * dim)
853  distanceLaplacianMetric = "block weighted";
854  else {
855  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
856  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
857  }
858  if (GetVerbLevel() & Statistics1)
859  GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
860  }
861  }
862 
863  if (socUsesMeasure == "unscaled") {
864  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
865  } else if (socUsesMeasure == "smoothed aggregation") {
866  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
867  } else if (socUsesMeasure == "signed ruge-stueben") {
868  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
869  } else if (socUsesMeasure == "signed smoothed aggregation") {
870  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
871  }
872  }
873  } else {
874  Kokkos::deep_copy(results, KEEP);
875 
876  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
877  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, no_op);
878  }
879 
880  if (symmetrizeDroppedGraph) {
881  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
882  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
883  }
884  }
885  LocalOrdinal nnz_filtered = nnz.first;
886  LocalOrdinal nnz_graph = nnz.second;
887  GO numTotal = lclA.nnz();
888  GO numDropped = numTotal - nnz_filtered;
889  // We now know the number of entries of filtered A and have the final rowptr.
890 
892  // Pass 4: Create local matrix for filtered A
893  //
894  // Dropped entries are optionally lumped to the diagonal.
895 
896  RCP<Matrix> filteredA;
897  RCP<LWGraph_kokkos> graph;
898  {
899  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
900 
901  local_matrix_type lclFilteredA;
902  if (reuseGraph) {
903  lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
904  } else {
905  auto colidx = entries_type("entries", nnz_filtered);
906  auto values = values_type("values", nnz_filtered);
907  lclFilteredA = local_matrix_type("filteredA",
908  lclA.numRows(), lclA.numCols(),
909  nnz_filtered,
910  values, filtered_rowptr, colidx);
911  }
912 
913  local_graph_type lclGraph;
914  {
915  auto colidx = entries_type("entries", nnz_graph);
916  lclGraph = local_graph_type(colidx, graph_rowptr);
917  }
918 
919  if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
920  if (reuseGraph) {
921  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
922  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
923  } else {
924  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
925  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
926  }
927  } else {
928  if (reuseGraph) {
929  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
930  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
931  } else {
932  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
933  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
934  }
935  }
936 
937  filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
938  filteredA->SetFixedBlockSize(blkSize);
939 
940  if (reuseEigenvalue) {
941  // Reuse max eigenvalue from A
942  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
943  // the D^{-1}A estimate in A, may as well use it.
944  // NOTE: ML does that too
945  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
946  } else {
947  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
948  }
949 
950  graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
951  graph->SetBoundaryNodeMap(boundaryNodes);
952  }
953 
954  // Construct a second graph for coloring
955  if (generateColoringGraph) {
956  SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
957 
958  filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
959  graph_rowptr = rowptr_type("rowptr", numNodes + 1);
960  if (localizeColoringGraph) {
961  auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
962  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, drop_offrank);
963  }
964  if (symmetrizeColoringGraph) {
965  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
966  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
967  }
968  auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
969  auto lclGraph = local_graph_type(colidx, filtered_rowptr);
970  auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
971  Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
972 
973  auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
974  Set(currentLevel, "Coloring Graph", colorGraph);
975  }
976 
977  LO dofsPerNode = blkSize;
978 
979  Set(currentLevel, "DofsPerNode", dofsPerNode);
980  Set(currentLevel, "Graph", graph);
981  Set(currentLevel, "A", filteredA);
982 
983  return std::make_tuple(numDropped, boundaryNodes);
984 }
985 
986 } // namespace MueLu
987 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
void setValidator(RCP< const ParameterEntryValidator > const &validator)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold, MueLu::MatrixConstruction::lumpingType &lumpingChoice)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
LocalOrdinal LO
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Functor that drops boundary nodes for a blockSize == 1 problem.
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &...functors)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops off-rank entries.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level &currentLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Functor for marking nodes as Dirichlet.
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
Functor that serially applies sub-functors to rows.
size_type size() const
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
#define SET_VALID_ENTRY(name)
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)
bool is_null() const