MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12 
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
15 #include <sstream>
16 #include <string>
17 #include <tuple>
18 
19 #include "Xpetra_Matrix.hpp"
20 
22 
23 #include "MueLu_AmalgamationInfo.hpp"
24 #include "MueLu_Exceptions.hpp"
25 #include "MueLu_Level.hpp"
26 #include "MueLu_LWGraph_kokkos.hpp"
27 #include "MueLu_MasterList.hpp"
28 #include "MueLu_Monitor.hpp"
29 #include "MueLu_Utilities.hpp"
30 
31 // #define MUELU_COALESCE_DROP_DEBUG 1
32 
34 #include "MueLu_DroppingCommon.hpp"
36 
37 // The different dropping algorithms are split up over several translation units. This speeds up compilation and also avoids launch latencies on GPU.
39 #include "MueLu_ScalarDroppingClassical.hpp"
40 #include "MueLu_ScalarDroppingDistanceLaplacian.hpp"
42 #include "MueLu_VectorDroppingClassical.hpp"
43 #include "MueLu_VectorDroppingDistanceLaplacian.hpp"
44 
45 namespace MueLu {
46 
47 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
49  RCP<ParameterList> validParamList = rcp(new ParameterList());
50 
51 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
52  SET_VALID_ENTRY("aggregation: use blocking");
53  SET_VALID_ENTRY("aggregation: drop tol");
54  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
55  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
56  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
57  SET_VALID_ENTRY("aggregation: row sum drop tol");
58  SET_VALID_ENTRY("aggregation: strength-of-connection: matrix");
59  SET_VALID_ENTRY("aggregation: strength-of-connection: measure");
60  SET_VALID_ENTRY("aggregation: drop scheme");
61  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
62  SET_VALID_ENTRY("aggregation: distance laplacian metric");
63  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
64  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
65 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
66  SET_VALID_ENTRY("aggregation: distance laplacian algo");
67  SET_VALID_ENTRY("aggregation: classical algo");
68 #endif
69  SET_VALID_ENTRY("aggregation: symmetrize graph after dropping");
70  SET_VALID_ENTRY("aggregation: coloring: use color graph");
71  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
72 
73  SET_VALID_ENTRY("filtered matrix: use lumping");
74  SET_VALID_ENTRY("filtered matrix: reuse graph");
75  SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
76 
77  SET_VALID_ENTRY("filtered matrix: use root stencil");
78  SET_VALID_ENTRY("filtered matrix: use spread lumping");
79  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
80  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
81  SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
82 
83 #undef SET_VALID_ENTRY
84  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
85 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
86  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop"))));
87 #else
88  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop", "signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical", "signed classical distance laplacian", "signed classical sa distance laplacian"))));
89  validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
90  validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
91 #endif
92  validParamList->getEntry("aggregation: strength-of-connection: matrix").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("A", "distance laplacian"))));
93  validParamList->getEntry("aggregation: strength-of-connection: measure").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("smoothed aggregation", "signed smoothed aggregation", "signed ruge-stueben", "unscaled"))));
94  validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
95 
96  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
97  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
98  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
99  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
100  validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
101 
102  return validParamList;
103 }
104 
105 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
107  Input(currentLevel, "A");
108  Input(currentLevel, "UnAmalgamationInfo");
109 
110  const ParameterList& pL = GetParameterList();
111 
112  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
113  bool needCoords = (socUsesMatrix == "distance laplacian");
114 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
115  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
116  needCoords |= (droppingMethod.find("distance laplacian") != std::string::npos);
117 #endif
118  if (needCoords) {
119  Input(currentLevel, "Coordinates");
120  std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
121  if (distLaplMetric == "material")
122  Input(currentLevel, "Material");
123  }
124 
125  bool useBlocking = pL.get<bool>("aggregation: use blocking");
126 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
127  useBlocking |= (droppingMethod.find("block diagonal") != std::string::npos);
128 #endif
129  if (useBlocking) {
130  Input(currentLevel, "BlockNumber");
131  }
132 }
133 
134 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
136  Build(Level& currentLevel) const {
137  auto A = Get<RCP<Matrix>>(currentLevel, "A");
138  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
139  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
140 
141  std::tuple<GlobalOrdinal, boundary_nodes_type> results;
142  if (blkSize == 1)
143  results = BuildScalar(currentLevel);
144  else
145  results = BuildVector(currentLevel);
146 
147  if (GetVerbLevel() & Statistics1) {
148  GlobalOrdinal numDropped = std::get<0>(results);
149  auto boundaryNodes = std::get<1>(results);
150 
151  GO numLocalBoundaryNodes = 0;
152 
153  Kokkos::parallel_reduce(
154  "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
155  KOKKOS_LAMBDA(const LO i, GO& n) {
156  if (boundaryNodes(i))
157  n++;
158  },
159  numLocalBoundaryNodes);
160 
161  if (IsPrint(Statistics1)) {
162  auto comm = A->getRowMap()->getComm();
163 
164  std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
165  std::vector<GlobalOrdinal> globalStats(2);
166  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 2, localStats.data(), globalStats.data());
167 
168  GO numGlobalTotal = A->getGlobalNumEntries();
169  GO numGlobalBoundaryNodes = globalStats[0];
170  GO numGlobalDropped = globalStats[1];
171 
172  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
173  if (numGlobalTotal != 0) {
174  GetOStream(Statistics1) << "Number of dropped entries: "
175  << numGlobalDropped << "/" << numGlobalTotal
176  << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
177  }
178  }
179  }
180 }
181 
182 template <class local_matrix_type, class boundary_nodes_view, class... Functors>
183 void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
184  using local_ordinal_type = typename local_matrix_type::ordinal_type;
185  using execution_space = typename local_matrix_type::execution_space;
186  using range_type = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
187  auto range = range_type(0, boundaryNodes.extent(0));
188  auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, functors...);
189  Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries);
190 }
191 
192 template <class magnitudeType>
193 void translateOldAlgoParam(const Teuchos::ParameterList& pL, std::string& droppingMethod, bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure, bool& symmetrizeDroppedGraph, bool& generateColoringGraph, magnitudeType& threshold) {
194  std::set<std::string> validDroppingMethods = {"piece-wise", "cut-drop"};
195  if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
196  std::string algo = droppingMethod;
197  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
198  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
199 
200  // Remove prefix "block diagonal" from algo
201  if (algo.find("block diagonal") == 0) {
202  useBlocking = true;
203  algo = algo.substr(14);
204  if (algo != "") {
205  algo = algo.substr(1);
206  }
207  }
208 
209  if ((algo == "classical") || (algo == "signed classical sa") || (algo == "signed classical") || (algo == "colored signed classical")) {
210  socUsesMatrix = "A";
211 
212  if (algo == "classical") {
213  socUsesMeasure = "smoothed aggregation";
214  } else if (algo == "signed classical sa") {
215  socUsesMeasure = "signed smoothed aggregation";
216  } else if (algo == "signed classical") {
217  socUsesMeasure = "signed ruge-stueben";
218  } else if (algo == "colored signed classical") {
219  socUsesMeasure = "signed ruge-stueben";
220  generateColoringGraph = true;
221  }
222 
223  if (classicalAlgoStr == "default")
224  droppingMethod = "point-wise";
225  else if (classicalAlgoStr == "unscaled cut") {
226  socUsesMeasure = "unscaled";
227  droppingMethod = "cut-drop";
228  } else if (classicalAlgoStr == "scaled cut") {
229  droppingMethod = "cut-drop";
230  } else if (classicalAlgoStr == "scaled cut symmetric") {
231  droppingMethod = "cut-drop";
232  symmetrizeDroppedGraph = true;
233  }
234  } else if ((algo == "distance laplacian") || (algo == "signed classical sa distance laplacian") || (algo == "signed classical distance laplacian")) {
235  socUsesMatrix = "distance laplacian";
236 
237  if (algo == "distance laplacian") {
238  socUsesMeasure = "smoothed aggregation";
239  } else if (algo == "signed classical sa distance laplacian") {
240  socUsesMeasure = "signed smoothed aggregation";
241  } else if (algo == "signed classical distance laplacian") {
242  socUsesMeasure = "signed ruge-stueben";
243  }
244 
245  if (distanceLaplacianAlgoStr == "default")
246  droppingMethod = "point-wise";
247  else if (distanceLaplacianAlgoStr == "unscaled cut") {
248  socUsesMeasure = "unscaled";
249  droppingMethod = "cut-drop";
250  } else if (distanceLaplacianAlgoStr == "scaled cut") {
251  droppingMethod = "cut-drop";
252  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
253  droppingMethod = "cut-drop";
254  symmetrizeDroppedGraph = true;
255  }
256  } else if (algo == "") {
257  // algo was "block diagonal", but we process and remove the "block diagonal" part
258  socUsesMatrix = "A";
260  }
261  }
262 }
263 
264 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
265 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
266  BuildScalar(Level& currentLevel) const {
267  FactoryMonitor m(*this, "BuildScalar", currentLevel);
268 
271  using local_matrix_type = typename MatrixType::local_matrix_type;
272  using local_graph_type = typename GraphType::local_graph_type;
273  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
274  using entries_type = typename local_graph_type::entries_type::non_const_type;
275  using values_type = typename local_matrix_type::values_type::non_const_type;
276  using device_type = typename Node::device_type;
277  using memory_space = typename device_type::memory_space;
278  using results_view_type = Kokkos::View<DecisionType*, memory_space>;
279  using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
280  using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
281 
282  typedef Teuchos::ScalarTraits<Scalar> STS;
283  const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
284 
285  auto A = Get<RCP<Matrix>>(currentLevel, "A");
286 
288  // Process parameterlist
289  const ParameterList& pL = GetParameterList();
290 
291  // Boundary detection
292  const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
293  const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
294  const LocalOrdinal dirichletNonzeroThreshold = 1;
295 
296  // Dropping
297  bool useBlocking = pL.get<bool>("aggregation: use blocking");
298  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
299  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
300  std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
301  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
302  bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
303  magnitudeType threshold;
304  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
305  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
306  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
307  else
308  threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
309  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
310 
311  // Fill
312  const bool lumping = pL.get<bool>("filtered matrix: use lumping");
313  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
314  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
315 
316  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
317  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
318 
319  const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
320 
321  // coloring graph
322  bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
323  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
324  const bool symmetrizeColoringGraph = true;
325 
326 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
327  translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold);
328 #endif
329 
330  {
331  std::stringstream ss;
332  ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
333  if (socUsesMatrix == "distance laplacian")
334  ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
335  ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
336  ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
337 
338  GetOStream(Runtime0) << ss.str();
339  }
340 
341  TEUCHOS_ASSERT(!useRootStencil);
342  TEUCHOS_ASSERT(!useSpreadLumping);
343  if (droppingMethod == "cut-drop")
344  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
345 
347  // We perform four sweeps over the rows of A:
348  // Pass 1: detection of boundary nodes
349  // Pass 2: diagonal extraction
350  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
351  // Pass 4: fill of the filtered matrix
352  //
353  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
354 
355  // TODO: We could merge pass 1 and 2.
356 
357  auto crsA = toCrsMatrix(A);
358  auto lclA = crsA->getLocalMatrixDevice();
359  auto range = range_type(0, lclA.numRows());
360 
362  // Pass 1: Detect boundary nodes
363  //
364  // The following criteria are available:
365  // - BoundaryDetection::PointDirichletFunctor
366  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
367  // - BoundaryDetection::RowSumFunctor
368  // Marks rows as Dirichlet bases on row-sum criterion
369 
370  // Dirichlet nodes
371  auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
372  {
373  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
374 
375  // macro that applies boundary detection functors
376  auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
377 
378  if (rowSumTol <= 0.) {
379  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
380  } else {
381  auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
382  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection, apply_rowsum);
383  }
384  }
385  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
386  // Otherwise we're now done with it now.
387 
389  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
390  // rowptr of filtered matrix
391  //
392  // The following criteria are available:
393  // - Misc::PointwiseDropBoundaryFunctor
394  // Drop all rows that have been marked as Dirichlet
395  // - Misc::DropOffRankFunctor
396  // Drop all entries that are off-rank
397  // - ClassicalDropping::DropFunctor
398  // Classical dropping
399  // - DistanceLaplacian::DropFunctor
400  // Distance Laplacian dropping
401  // - Misc::KeepDiagonalFunctor
402  // Mark diagonal as KEEP
403  // - Misc::MarkSingletonFunctor
404  // Mark singletons after dropping as Dirichlet
405  // - Misc::BlockDiagonalizeFunctor
406  // Drop coupling between blocks
407  //
408  // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
409 
410  // rowptr of filtered A
411  auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
412  // Number of nonzeros of filtered A
413  LocalOrdinal nnz_filtered = 0;
414  // dropping decisions for each entry
415  auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
416  {
417  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
418 
419  if (threshold != zero) {
420  if (socUsesMatrix == "A") {
421  if (socUsesMeasure == "unscaled") {
422  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
423  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
424  } else if (socUsesMeasure == "smoothed aggregation") {
425  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
426  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
427  } else if (socUsesMeasure == "signed ruge-stueben") {
428  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
429  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
430  } else if (socUsesMeasure == "signed smoothed aggregation") {
431  ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
432  aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
433  }
434  } else if (socUsesMatrix == "distance laplacian") {
435  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
436  if (socUsesMeasure == "unscaled") {
437  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
438  } else if (socUsesMeasure == "smoothed aggregation") {
439  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
440  } else if (socUsesMeasure == "signed ruge-stueben") {
441  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
442  } else if (socUsesMeasure == "signed smoothed aggregation") {
443  ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
444  }
445  }
446  } else {
447  Kokkos::deep_copy(results, KEEP);
448 
449  if (symmetrizeDroppedGraph) {
450  auto drop_boundaries = Misc::PointwiseSymmetricDropBoundaryFunctor(*A, boundaryNodes, results);
451  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_boundaries);
452  } else {
453  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
454  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, no_op);
455  }
456  }
457 
458  if (symmetrizeDroppedGraph) {
459  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
460  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
461  }
462  }
463  GO numDropped = lclA.nnz() - nnz_filtered;
464  // We now know the number of entries of filtered A and have the final rowptr.
465 
467  // Pass 4: Create local matrix for filtered A
468  //
469  // Dropped entries are optionally lumped to the diagonal.
470 
471  RCP<Matrix> filteredA;
472  RCP<LWGraph_kokkos> graph;
473  {
474  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
475 
476  local_matrix_type lclFilteredA;
477  local_graph_type lclGraph;
478  if (reuseGraph) {
479  filteredA = MatrixFactory::BuildCopy(A);
480  lclFilteredA = filteredA->getLocalMatrixDevice();
481 
482  auto colidx = entries_type("entries", nnz_filtered);
483  lclGraph = local_graph_type(colidx, filtered_rowptr);
484  } else {
485  auto colidx = entries_type("entries", nnz_filtered);
486  auto values = values_type("values", nnz_filtered);
487  lclFilteredA = local_matrix_type("filteredA",
488  lclA.numRows(), lclA.numCols(),
489  nnz_filtered,
490  values, filtered_rowptr, colidx);
491  }
492 
493  if (lumping) {
494  if (reuseGraph) {
495  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
496  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
497  } else {
498  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, true>(lclA, results, lclFilteredA, filteringDirichletThreshold);
499  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
500  }
501  } else {
502  if (reuseGraph) {
503  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
504  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
505  } else {
506  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, false>(lclA, results, lclFilteredA, filteringDirichletThreshold);
507  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
508  }
509  }
510 
511  if (!reuseGraph)
512  filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
513  filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
514 
515  if (reuseEigenvalue) {
516  // Reuse max eigenvalue from A
517  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
518  // the D^{-1}A estimate in A, may as well use it.
519  // NOTE: ML does that too
520  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
521  } else {
522  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
523  }
524 
525  if (!reuseGraph) {
526  // Use graph of filteredA as graph.
527  lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
528  }
529  graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
530  graph->SetBoundaryNodeMap(boundaryNodes);
531  }
532 
533  // Construct a second graph for coloring
534  if (generateColoringGraph) {
535  SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
536 
537  filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
538  if (localizeColoringGraph) {
539  auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
540  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_offrank);
541  }
542  if (symmetrizeColoringGraph) {
543  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
544  ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
545  }
546  auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
547  auto lclGraph = local_graph_type(colidx, filtered_rowptr);
548  auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
549  Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
550 
551  auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
552  Set(currentLevel, "Coloring Graph", colorGraph);
553  }
554 
555  LO dofsPerNode = 1;
556  Set(currentLevel, "DofsPerNode", dofsPerNode);
557  Set(currentLevel, "Graph", graph);
558  Set(currentLevel, "A", filteredA);
559 
560  return std::make_tuple(numDropped, boundaryNodes);
561 }
562 
563 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
564 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
565  BuildVector(Level& currentLevel) const {
566  FactoryMonitor m(*this, "BuildVector", currentLevel);
567 
570  using local_matrix_type = typename MatrixType::local_matrix_type;
571  using local_graph_type = typename GraphType::local_graph_type;
572  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
573  using entries_type = typename local_graph_type::entries_type::non_const_type;
574  using values_type = typename local_matrix_type::values_type::non_const_type;
575  using device_type = typename Node::device_type;
576  using memory_space = typename device_type::memory_space;
577  using results_view_type = Kokkos::View<DecisionType*, memory_space>;
578  using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
579  using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
580 
581  typedef Teuchos::ScalarTraits<Scalar> STS;
582  const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
583 
584  auto A = Get<RCP<Matrix>>(currentLevel, "A");
585 
586  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
587  blkSize is the number of storage blocks that must kept together during the amalgamation process.
588 
589  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
590 
591  numPDEs = blkSize * storageblocksize.
592 
593  If numPDEs==1
594  Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
595  No other values makes sense.
596 
597  If numPDEs>1
598  If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
599  If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
600  Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
601  */
602 
603  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
604  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
605 
606  auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
607 
608  const RCP<const Map> rowMap = A->getRowMap();
609  const RCP<const Map> colMap = A->getColMap();
610 
611  // build a node row map (uniqueMap = non-overlapping) and a node column map
612  // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
613  // stored in the AmalgamationInfo class container contain the local node id
614  // given a local dof id. The data is calculated in the AmalgamationFactory and
615  // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
616  const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
617  const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
618  Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
619  Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
620 
621  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
622  rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
623  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
624  colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
625 
626  // get number of local nodes
627  LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
628  typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
629  id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
630  id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
631  Kokkos::deep_copy(rowTranslation, rowTranslationView);
632  Kokkos::deep_copy(colTranslation, colTranslationView);
633 
634  // extract striding information
635  blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
636  LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
637  LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
638  if (A->IsView("stridedMaps") == true) {
639  const RCP<const Map> myMap = A->getRowMap("stridedMaps");
640  const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
641  TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
642  blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
643  blkId = strMap->getStridedBlockId();
644  if (blkId > -1)
645  blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
646  }
647 
648  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
649 
651  // Process parameterlist
652  const ParameterList& pL = GetParameterList();
653 
654  // Boundary detection
655  const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
656  const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
657  const LocalOrdinal dirichletNonzeroThreshold = 1;
658  const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
659  TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
660 
661  // Dropping
662  bool useBlocking = pL.get<bool>("aggregation: use blocking");
663  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
664  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
665  std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
666  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
667  bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
668  magnitudeType threshold;
669  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
670  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
671  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
672  else
673  threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
674  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
675 
676  // Fill
677  const bool lumping = pL.get<bool>("filtered matrix: use lumping");
678  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
679  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
680 
681  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
682  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
683 
684  const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
685 
686  // coloring graph
687  bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
688  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
689  const bool symmetrizeColoringGraph = true;
690 
691 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
692  translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold);
693 #endif
694  {
695  std::stringstream ss;
696  ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
697  if (socUsesMatrix == "distance laplacian")
698  ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
699  ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
700  ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
701 
702  GetOStream(Runtime0) << ss.str();
703  }
704 
705  TEUCHOS_ASSERT(!useRootStencil);
706  TEUCHOS_ASSERT(!useSpreadLumping);
707  if (droppingMethod == "cut-drop")
708  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
709 
711  // We perform four sweeps over the rows of A:
712  // Pass 1: detection of boundary nodes
713  // Pass 2: diagonal extraction
714  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
715  // Pass 4: fill of the filtered matrix
716  //
717  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
718 
719  // TODO: We could merge pass 1 and 2.
720 
721  auto crsA = toCrsMatrix(A);
722  auto lclA = crsA->getLocalMatrixDevice();
723  auto range = range_type(0, numNodes);
724 
726  // Pass 1: Detect boundary nodes
727  //
728  // The following criteria are available:
729  // - BoundaryDetection::VectorDirichletFunctor
730  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
731 
732  // Dirichlet nodes
733  auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
734  {
735  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
736 
737  if (useGreedyDirichlet) {
738  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
739  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
740  } else {
741  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
742  runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
743  }
744  }
745  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
746  // Otherwise we're now done with it now.
747 
749  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
750  // rowptr of filtered matrix
751  //
752  // The following criteria are available:
753  // - Misc::VectorDropBoundaryFunctor
754  // Drop all rows that have been marked as Dirichlet
755  // - Misc::DropOffRankFunctor
756  // Drop all entries that are off-rank
757  // - ClassicalDropping::DropFunctor
758  // Classical dropping
759  // - DistanceLaplacian::VectorDropFunctor
760  // Distance Laplacian dropping
761  // - Misc::KeepDiagonalFunctor
762  // Mark diagonal as KEEP
763  // - Misc::MarkSingletonFunctor
764  // Mark singletons after dropping as Dirichlet
765 
766  // rowptr of filtered A
767  auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
768  auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
769  // Number of nonzeros of filtered A and graph
770  Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
771 
772  // dropping decisions for each entry
773  auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
774 
775  RCP<Matrix> mergedA;
776  {
777  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
778 
779  {
780  // Construct merged A.
781 
782  auto merged_rowptr = rowptr_type("rowptr", numNodes + 1);
783  LocalOrdinal nnz_merged = 0;
784 
785  auto functor = MatrixConstruction::MergeCountFunctor(lclA, blkPartSize, colTranslation, merged_rowptr);
786  Kokkos::parallel_scan("MergeCount", range, functor, nnz_merged);
787 
788  local_graph_type lclMergedGraph;
789  auto colidx_merged = entries_type("entries", nnz_merged);
790  auto values_merged = values_type("values", nnz_merged);
791 
792  local_matrix_type lclMergedA = local_matrix_type("mergedA",
793  numNodes, nonUniqueMap->getLocalNumElements(),
794  nnz_merged,
795  values_merged, merged_rowptr, colidx_merged);
796 
797  auto fillFunctor = MatrixConstruction::MergeFillFunctor<local_matrix_type>(lclA, blkSize, colTranslation, lclMergedA);
798  Kokkos::parallel_for("MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
799 
800  mergedA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclMergedA, uniqueMap, nonUniqueMap, uniqueMap, uniqueMap);
801  }
802 
803  if (threshold != zero) {
804  if (socUsesMatrix == "A") {
805  if (socUsesMeasure == "unscaled") {
806  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
807  } else if (socUsesMeasure == "smoothed aggregation") {
808  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
809  } else if (socUsesMeasure == "signed ruge-stueben") {
810  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
811  } else if (socUsesMeasure == "signed smoothed aggregation") {
812  VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
813  }
814  } else if (socUsesMatrix == "distance laplacian") {
815  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
816 
817  Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
818  LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
819  if (socUsesMeasure == "distance laplacian") {
820  LO dim = (LO)coords->getNumVectors();
821  // If anything isn't 1.0 we need to turn on the weighting
822  bool non_unity = false;
823  for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
824  if (dlap_weights[i] != 1.0) {
825  non_unity = true;
826  }
827  }
828  if (non_unity) {
829  if ((LO)dlap_weights.size() == dim) {
830  distanceLaplacianMetric = "weighted";
831  } else if ((LO)dlap_weights.size() == interleaved_blocksize * dim)
832  distanceLaplacianMetric = "block weighted";
833  else {
834  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
835  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
836  }
837  if (GetVerbLevel() & Statistics1)
838  GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
839  }
840  }
841 
842  if (socUsesMeasure == "unscaled") {
843  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
844  } else if (socUsesMeasure == "smoothed aggregation") {
845  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
846  } else if (socUsesMeasure == "signed ruge-stueben") {
847  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
848  } else if (socUsesMeasure == "signed smoothed aggregation") {
849  VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
850  }
851  }
852  } else {
853  Kokkos::deep_copy(results, KEEP);
854 
855  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
856  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, no_op);
857  }
858 
859  if (symmetrizeDroppedGraph) {
860  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
861  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
862  }
863  }
864  LocalOrdinal nnz_filtered = nnz.first;
865  LocalOrdinal nnz_graph = nnz.second;
866  GO numTotal = lclA.nnz();
867  GO numDropped = numTotal - nnz_filtered;
868  // We now know the number of entries of filtered A and have the final rowptr.
869 
871  // Pass 4: Create local matrix for filtered A
872  //
873  // Dropped entries are optionally lumped to the diagonal.
874 
875  RCP<Matrix> filteredA;
876  RCP<LWGraph_kokkos> graph;
877  {
878  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
879 
880  local_matrix_type lclFilteredA;
881  if (reuseGraph) {
882  lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
883  } else {
884  auto colidx = entries_type("entries", nnz_filtered);
885  auto values = values_type("values", nnz_filtered);
886  lclFilteredA = local_matrix_type("filteredA",
887  lclA.numRows(), lclA.numCols(),
888  nnz_filtered,
889  values, filtered_rowptr, colidx);
890  }
891 
892  local_graph_type lclGraph;
893  {
894  auto colidx = entries_type("entries", nnz_graph);
895  lclGraph = local_graph_type(colidx, graph_rowptr);
896  }
897 
898  if (lumping) {
899  if (reuseGraph) {
900  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
901  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
902  } else {
903  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
904  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
905  }
906  } else {
907  if (reuseGraph) {
908  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
909  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
910  } else {
911  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
912  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
913  }
914  }
915 
916  filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
917  filteredA->SetFixedBlockSize(blkSize);
918 
919  if (reuseEigenvalue) {
920  // Reuse max eigenvalue from A
921  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
922  // the D^{-1}A estimate in A, may as well use it.
923  // NOTE: ML does that too
924  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
925  } else {
926  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
927  }
928 
929  graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
930  graph->SetBoundaryNodeMap(boundaryNodes);
931  }
932 
933  // Construct a second graph for coloring
934  if (generateColoringGraph) {
935  SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
936 
937  filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
938  graph_rowptr = rowptr_type("rowptr", numNodes + 1);
939  if (localizeColoringGraph) {
940  auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
941  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, drop_offrank);
942  }
943  if (symmetrizeColoringGraph) {
944  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
945  VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
946  }
947  auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
948  auto lclGraph = local_graph_type(colidx, filtered_rowptr);
949  auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
950  Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
951 
952  auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
953  Set(currentLevel, "Coloring Graph", colorGraph);
954  }
955 
956  LO dofsPerNode = blkSize;
957 
958  Set(currentLevel, "DofsPerNode", dofsPerNode);
959  Set(currentLevel, "Graph", graph);
960  Set(currentLevel, "A", filteredA);
961 
962  return std::make_tuple(numDropped, boundaryNodes);
963 }
964 
965 } // namespace MueLu
966 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
void setValidator(RCP< const ParameterEntryValidator > const &validator)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
LocalOrdinal LO
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Functor that drops boundary nodes for a blockSize == 1 problem.
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &...functors)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops off-rank entries.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level &currentLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Functor for marking nodes as Dirichlet.
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
Functor that serially applies sub-functors to rows.
size_type size() const
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
#define SET_VALID_ENTRY(name)
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)
bool is_null() const