MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12 
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
15 #include <sstream>
16 #include <tuple>
17 
18 #include "Xpetra_Matrix.hpp"
19 
21 
22 #include "MueLu_AmalgamationInfo.hpp"
23 #include "MueLu_Exceptions.hpp"
24 #include "MueLu_Level.hpp"
25 #include "MueLu_LWGraph_kokkos.hpp"
26 #include "MueLu_MasterList.hpp"
27 #include "MueLu_Monitor.hpp"
28 
29 // #define MUELU_COALESCE_DROP_DEBUG 1
30 
33 #include "MueLu_CutDrop.hpp"
34 #include "MueLu_DroppingCommon.hpp"
37 
38 namespace MueLu {
39 
40 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
42  RCP<ParameterList> validParamList = rcp(new ParameterList());
43 
44 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
45  SET_VALID_ENTRY("aggregation: drop tol");
46  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
47  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
48  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
49  SET_VALID_ENTRY("aggregation: row sum drop tol");
50  SET_VALID_ENTRY("aggregation: drop scheme");
51  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
52  SET_VALID_ENTRY("aggregation: distance laplacian metric");
53  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
54  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
55  SET_VALID_ENTRY("aggregation: distance laplacian algo");
56  SET_VALID_ENTRY("aggregation: classical algo");
57  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
58 
59  SET_VALID_ENTRY("filtered matrix: use lumping");
60  SET_VALID_ENTRY("filtered matrix: reuse graph");
61  SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
62 
63  SET_VALID_ENTRY("filtered matrix: use root stencil");
64  SET_VALID_ENTRY("filtered matrix: use spread lumping");
65  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
66  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
67  SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
68 
69 #undef SET_VALID_ENTRY
70  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
71 
72  // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
73  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"))));
74  validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
75  validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
76  validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
77 
78  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
79  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
80  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
81  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
82  validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
83 
84  return validParamList;
85 }
86 
87 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
89  Input(currentLevel, "A");
90  Input(currentLevel, "UnAmalgamationInfo");
91 
92  const ParameterList& pL = GetParameterList();
93  std::string algo = pL.get<std::string>("aggregation: drop scheme");
94  std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
95  if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
96  Input(currentLevel, "Coordinates");
97  if (distLaplMetric == "material")
98  Input(currentLevel, "Material");
99  }
100  if (algo == "signed classical sa")
101  ;
102  else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
103  Input(currentLevel, "BlockNumber");
104  }
105 }
106 
107 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
109  Build(Level& currentLevel) const {
110  auto A = Get<RCP<Matrix>>(currentLevel, "A");
111  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
112  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
113 
114  std::tuple<GlobalOrdinal, boundary_nodes_type> results;
115  if (blkSize == 1)
116  results = BuildScalar(currentLevel);
117  else
118  results = BuildVector(currentLevel);
119 
120  if (GetVerbLevel() & Statistics1) {
121  GlobalOrdinal numDropped = std::get<0>(results);
122  auto boundaryNodes = std::get<1>(results);
123 
124  GO numLocalBoundaryNodes = 0;
125  GO numGlobalBoundaryNodes = 0;
126 
127  Kokkos::parallel_reduce(
128  "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
129  KOKKOS_LAMBDA(const LO i, GO& n) {
130  if (boundaryNodes(i))
131  n++;
132  },
133  numLocalBoundaryNodes);
134 
135  auto comm = A->getRowMap()->getComm();
136  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
137 
138  GO numGlobalTotal = A->getGlobalNumEntries();
139  GO numGlobalDropped;
140  MueLu_sumAll(comm, numDropped, numGlobalDropped);
141 
142  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
143  if (numGlobalTotal != 0) {
144  GetOStream(Statistics1) << "Number of dropped entries: "
145  << numGlobalDropped << "/" << numGlobalTotal
146  << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
147  }
148  }
149 }
150 
151 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
152 std::tuple<Teuchos::RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>>, Teuchos::RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>>> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
153  GetBlockNumberMVs(Level& currentLevel) const {
154  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
155  RCP<LocalOrdinalVector> ghostedBlockNumber;
156  GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
157 
158  // Ghost the column block numbers if we need to
159  auto A = Get<RCP<Matrix>>(currentLevel, "A");
160  RCP<const Import> importer = A->getCrsGraph()->getImporter();
161  if (!importer.is_null()) {
162  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
163  ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
164  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
165  } else {
166  ghostedBlockNumber = BlockNumber;
167  }
168  return std::make_tuple(BlockNumber, ghostedBlockNumber);
169 }
170 
171 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
172 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
173  BuildScalar(Level& currentLevel) const {
174  FactoryMonitor m(*this, "Build", currentLevel);
175 
178  using local_matrix_type = typename MatrixType::local_matrix_type;
179  using local_graph_type = typename GraphType::local_graph_type;
180  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
181  using entries_type = typename local_graph_type::entries_type::non_const_type;
182  using values_type = typename local_matrix_type::values_type::non_const_type;
183  using device_type = typename Node::device_type;
184  using memory_space = typename device_type::memory_space;
185 
186  typedef Teuchos::ScalarTraits<SC> STS;
187  typedef typename STS::magnitudeType MT;
188  const MT zero = Teuchos::ScalarTraits<MT>::zero();
189 
190  auto A = Get<RCP<Matrix>>(currentLevel, "A");
191 
193  // Process parameterlist
194  const ParameterList& pL = GetParameterList();
195 
196  // Boundary detection
197  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
198  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
199  const LocalOrdinal dirichletNonzeroThreshold = 1;
200 
201  // Dropping
202  const std::string algo = pL.get<std::string>("aggregation: drop scheme");
203  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
204  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
205  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
206  MT threshold;
207  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
208  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
209  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
210  else
211  threshold = as<MT>(pL.get<double>("aggregation: drop tol"));
212  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
213 
214  // Fill
215  const bool lumping = pL.get<bool>("filtered matrix: use lumping");
216  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
217  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
218 
219  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
220  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
221 
222  const MT filteringDirichletThreshold = as<MT>(pL.get<double>("filtered matrix: Dirichlet threshold"));
223  TEUCHOS_ASSERT(!useRootStencil);
224  TEUCHOS_ASSERT(!useSpreadLumping);
225 
226  if (algo == "classical")
227  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
228  else if (algo == "distance laplacian")
229  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\" distance laplacian metric = \"" << distanceLaplacianMetric << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
230  else
231  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
232 
233  if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos)))
234  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
235 
237  // We perform four sweeps over the rows of A:
238  // Pass 1: detection of boundary nodes
239  // Pass 2: diagonal extraction
240  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
241  // Pass 4: fill of the filtered matrix
242  //
243  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
244 
245  // TODO: We could merge pass 1 and 2.
246 
247  auto crsA = toCrsMatrix(A);
248  auto lclA = crsA->getLocalMatrixDevice();
249  auto range = range_type(0, lclA.numRows());
250 
252  // Pass 1: Detect boundary nodes
253  //
254  // The following criteria are available:
255  // - BoundaryDetection::PointDirichletFunctor
256  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
257  // - BoundaryDetection::RowSumFunctor
258  // Marks rows as Dirichlet bases on row-sum criterion
259 
260  // Dirichlet nodes
261  auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
262  {
263  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
264 
265  // macro that applies boundary detection functors
266 #define MueLu_runBoundaryFunctors(...) \
267  { \
268  auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
269  Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
270  }
271 
272  auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
273 
274  if (rowSumTol <= 0.) {
275  MueLu_runBoundaryFunctors(dirichlet_detection);
276  } else {
277  auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
278  MueLu_runBoundaryFunctors(dirichlet_detection,
279  apply_rowsum);
280  }
281 #undef MueLu_runBoundaryFunctors
282  }
283  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
284  // Otherwise we're now done with it now.
285 
287  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
288  // rowptr of filtered matrix
289  //
290  // The following criteria are available:
291  // - Misc::PointwiseDropBoundaryFunctor
292  // Drop all rows that have been marked as Dirichlet
293  // - Misc::DropOffRankFunctor
294  // Drop all entries that are off-rank
295  // - ClassicalDropping::SAFunctor
296  // Classical dropping
297  // - ClassicalDropping::SignedRSFunctor
298  // Classical RS dropping
299  // - ClassicalDropping::SignedSAFunctor
300  // Classical signed SA dropping
301  // - DistanceLaplacian::DropFunctor
302  // Distance Laplacian dropping
303  // - Misc::KeepDiagonalFunctor
304  // Mark diagonal as KEEP
305  // - Misc::MarkSingletonFunctor
306  // Mark singletons after dropping as Dirichlet
307  // - Misc::BlockDiagonalizeFunctor
308  // Drop coupling between blocks
309  //
310  // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
311 
312  // rowptr of filtered A
313  auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
314  // Number of nonzeros of filtered A
315  LocalOrdinal nnz_filtered = 0;
316  // dropping decisions for each entry
317  auto results = Kokkos::View<DecisionType*, memory_space>("results", lclA.nnz()); // initialized to UNDECIDED
318  {
319  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
320 
321  std::string functorLabel = "MueLu::CoalesceDrop::CountEntries";
322 
323  // macro that applied dropping functors
324 #if !defined(HAVE_MUELU_DEBUG)
325 #define MueLu_runDroppingFunctors(...) \
326  { \
327  auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__); \
328  Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz_filtered); \
329  }
330 #else
331 #define MueLu_runDroppingFunctors(...) \
332  { \
333  auto debug = Misc::DebugFunctor(lclA, results); \
334  auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__, debug); \
335  Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz_filtered); \
336  }
337 #endif
338 
339  auto drop_boundaries = Misc::PointwiseDropBoundaryFunctor(lclA, boundaryNodes, results);
340 
341  if (threshold != zero) {
342  auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results);
343  auto mark_singletons_as_boundary = Misc::MarkSingletonFunctor(lclA, boundaryNodes, results);
344 
345  if (algo == "classical" || algo == "block diagonal classical") {
346  if (algo == "block diagonal classical") {
347  auto BlockNumbers = GetBlockNumberMVs(currentLevel);
348  auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
349 
350  if (classicalAlgoStr == "default") {
351  auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results);
352 
353  if (aggregationMayCreateDirichlet) {
354  MueLu_runDroppingFunctors(block_diagonalize,
355  classical_dropping,
356  drop_boundaries,
357  preserve_diagonals,
358  mark_singletons_as_boundary);
359  } else {
360  MueLu_runDroppingFunctors(block_diagonalize,
361  classical_dropping,
362  drop_boundaries,
363  preserve_diagonals);
364  }
365  } else if (classicalAlgoStr == "unscaled cut") {
366  auto comparison = CutDrop::UnscaledComparison(*A, results);
367  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
368 
369  MueLu_runDroppingFunctors(block_diagonalize,
370  drop_boundaries,
371  preserve_diagonals,
372  cut_drop);
373  } else if (classicalAlgoStr == "scaled cut") {
374  auto comparison = CutDrop::ScaledComparison(*A, results);
375  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
376 
377  MueLu_runDroppingFunctors(block_diagonalize,
378  drop_boundaries,
379  preserve_diagonals,
380  cut_drop);
381  } else if (classicalAlgoStr == "scaled cut symmetric") {
382  auto comparison = CutDrop::ScaledComparison(*A, results);
383  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
384 
385  MueLu_runDroppingFunctors(block_diagonalize,
386  drop_boundaries,
387  preserve_diagonals,
388  cut_drop);
389 
390  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
391 
392  MueLu_runDroppingFunctors(symmetrize);
393 
394  } else {
395  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\"");
396  }
397  } else {
398  if (classicalAlgoStr == "default") {
399  auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results);
400 
401  if (aggregationMayCreateDirichlet) {
402  MueLu_runDroppingFunctors(classical_dropping,
403  drop_boundaries,
404  preserve_diagonals,
405  mark_singletons_as_boundary);
406  } else {
407  MueLu_runDroppingFunctors(classical_dropping,
408  drop_boundaries,
409  preserve_diagonals);
410  }
411  } else if (classicalAlgoStr == "unscaled cut") {
412  auto comparison = CutDrop::UnscaledComparison(*A, results);
413  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
414 
415  MueLu_runDroppingFunctors(drop_boundaries,
416  preserve_diagonals,
417  cut_drop);
418  } else if (classicalAlgoStr == "scaled cut") {
419  auto comparison = CutDrop::ScaledComparison(*A, results);
420  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
421 
422  MueLu_runDroppingFunctors(drop_boundaries,
423  preserve_diagonals,
424  cut_drop);
425  } else if (classicalAlgoStr == "scaled cut symmetric") {
426  auto comparison = CutDrop::ScaledComparison(*A, results);
427  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
428 
429  MueLu_runDroppingFunctors(drop_boundaries,
430  preserve_diagonals,
431  cut_drop);
432 
433  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
434 
435  MueLu_runDroppingFunctors(symmetrize);
436 
437  } else {
438  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\"");
439  }
440  }
441  } else if (algo == "signed classical" || algo == "block diagonal signed classical" || algo == "block diagonal colored signed classical") {
442  auto signed_classical_rs_dropping = ClassicalDropping::SignedRSFunctor(*A, threshold, results);
443 
444  if (algo == "block diagonal signed classical" || algo == "block diagonal colored signed classical") {
445  auto BlockNumbers = GetBlockNumberMVs(currentLevel);
446  auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
447 
448  if (classicalAlgoStr == "default") {
449  if (aggregationMayCreateDirichlet) {
450  MueLu_runDroppingFunctors(block_diagonalize,
451  signed_classical_rs_dropping,
452  drop_boundaries,
453  preserve_diagonals,
454  mark_singletons_as_boundary);
455 
456  } else {
457  MueLu_runDroppingFunctors(block_diagonalize,
458  signed_classical_rs_dropping,
459  drop_boundaries,
460  preserve_diagonals);
461  }
462  } else {
463  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\"");
464  }
465  } else {
466  if (classicalAlgoStr == "default") {
467  if (aggregationMayCreateDirichlet) {
468  MueLu_runDroppingFunctors(signed_classical_rs_dropping,
469  drop_boundaries,
470  preserve_diagonals,
471  mark_singletons_as_boundary);
472 
473  } else {
474  MueLu_runDroppingFunctors(signed_classical_rs_dropping,
475  drop_boundaries,
476  preserve_diagonals);
477  }
478  } else {
479  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\"");
480  }
481  }
482  } else if (algo == "signed classical sa") {
483  if (classicalAlgoStr == "default") {
484  auto signed_classical_sa_dropping = ClassicalDropping::SignedSAFunctor(*A, threshold, results);
485 
486  if (aggregationMayCreateDirichlet) {
487  MueLu_runDroppingFunctors(signed_classical_sa_dropping,
488  drop_boundaries,
489  preserve_diagonals,
490  mark_singletons_as_boundary);
491 
492  } else {
493  MueLu_runDroppingFunctors(signed_classical_sa_dropping,
494  drop_boundaries,
495  preserve_diagonals);
496  }
497  } else {
498  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\"");
499  }
500  } else if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
502  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
503 
504  if (algo == "block diagonal distance laplacian") {
505  auto BlockNumbers = GetBlockNumberMVs(currentLevel);
506  auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
507 
508  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
509 
510  if (distanceLaplacianAlgoStr == "default") {
511  auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
512 
513  if (aggregationMayCreateDirichlet) {
514  MueLu_runDroppingFunctors(block_diagonalize,
515  dist_laplacian_dropping,
516  drop_boundaries,
517  preserve_diagonals,
518  mark_singletons_as_boundary);
519  } else {
520  MueLu_runDroppingFunctors(block_diagonalize,
521  dist_laplacian_dropping,
522  drop_boundaries,
523  preserve_diagonals);
524  }
525  } else if (distanceLaplacianAlgoStr == "unscaled cut") {
526  auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
527  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
528 
529  MueLu_runDroppingFunctors(block_diagonalize,
530  drop_boundaries,
531  preserve_diagonals,
532  cut_drop);
533  } else if (distanceLaplacianAlgoStr == "scaled cut") {
534  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
535  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
536 
537  MueLu_runDroppingFunctors(block_diagonalize,
538  drop_boundaries,
539  preserve_diagonals,
540  cut_drop);
541  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
542  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
543  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
544 
545  MueLu_runDroppingFunctors(block_diagonalize,
546  drop_boundaries,
547  cut_drop,
548  preserve_diagonals);
549 
550  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
551 
552  MueLu_runDroppingFunctors(symmetrize);
553  } else {
554  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\"");
555  }
556  } else {
557  if (distanceLaplacianAlgoStr == "default") {
558  if (distanceLaplacianMetric == "unweighted") {
559  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
560  auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
561 
562  if (aggregationMayCreateDirichlet) {
563  MueLu_runDroppingFunctors(dist_laplacian_dropping,
564  drop_boundaries,
565  preserve_diagonals,
566  mark_singletons_as_boundary);
567  } else {
568  MueLu_runDroppingFunctors(dist_laplacian_dropping,
569  drop_boundaries,
570  preserve_diagonals);
571  }
572  } else if (distanceLaplacianMetric == "material") {
573  auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
574  if (material->getNumVectors() == 1) {
575  GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
576 
577  auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
578  auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
579 
580  if (aggregationMayCreateDirichlet) {
581  MueLu_runDroppingFunctors(dist_laplacian_dropping,
582  drop_boundaries,
583  preserve_diagonals,
584  mark_singletons_as_boundary);
585  } else {
586  MueLu_runDroppingFunctors(dist_laplacian_dropping,
587  drop_boundaries,
588  preserve_diagonals);
589  }
590  } else {
591  TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
592 
593  {
594  std::stringstream ss;
595  ss << "material tensor mean =" << std::endl;
596  size_t k = 0;
597  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
598  ss << " ";
599  for (size_t j = 0; j < coords->getNumVectors(); ++j) {
600  ss << material->getVector(k)->meanValue() << " ";
601  ++k;
602  }
603  ss << std::endl;
604  }
605  GetOStream(Runtime0) << ss.str();
606  }
607 
608  auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
609  auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
610 
611  if (aggregationMayCreateDirichlet) {
612  MueLu_runDroppingFunctors(dist_laplacian_dropping,
613  drop_boundaries,
614  preserve_diagonals,
615  mark_singletons_as_boundary);
616  } else {
617  MueLu_runDroppingFunctors(dist_laplacian_dropping,
618  drop_boundaries,
619  preserve_diagonals);
620  }
621  }
622  }
623 
624  } else if (distanceLaplacianAlgoStr == "unscaled cut") {
625  if (distanceLaplacianMetric == "unweighted") {
626  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
627  auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
628  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
629 
630  MueLu_runDroppingFunctors(drop_boundaries,
631  preserve_diagonals,
632  cut_drop);
633  } else if (distanceLaplacianMetric == "material") {
634  auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
635  if (material->getNumVectors() == 1) {
636  GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
637 
638  auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
639  auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
640  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
641 
642  MueLu_runDroppingFunctors(drop_boundaries,
643  preserve_diagonals,
644  cut_drop);
645  } else {
646  TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
647 
648  {
649  std::stringstream ss;
650  ss << "material tensor mean =" << std::endl;
651  size_t k = 0;
652  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
653  ss << " ";
654  for (size_t j = 0; j < coords->getNumVectors(); ++j) {
655  ss << material->getVector(k)->meanValue() << " ";
656  ++k;
657  }
658  ss << std::endl;
659  }
660  GetOStream(Runtime0) << ss.str();
661  }
662 
663  auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
664  auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
665  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
666 
667  MueLu_runDroppingFunctors(drop_boundaries,
668  preserve_diagonals,
669  cut_drop);
670  }
671  }
672  } else if (distanceLaplacianAlgoStr == "scaled cut") {
673  if (distanceLaplacianMetric == "unweighted") {
674  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
675  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
676  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
677 
678  MueLu_runDroppingFunctors(drop_boundaries,
679  preserve_diagonals,
680  cut_drop);
681  } else if (distanceLaplacianMetric == "material") {
682  auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
683  if (material->getNumVectors() == 1) {
684  GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
685 
686  auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
687  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
688  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
689 
690  MueLu_runDroppingFunctors(drop_boundaries,
691  preserve_diagonals,
692  cut_drop);
693  } else {
694  TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
695 
696  {
697  std::stringstream ss;
698  ss << "material tensor mean =" << std::endl;
699  size_t k = 0;
700  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
701  ss << " ";
702  for (size_t j = 0; j < coords->getNumVectors(); ++j) {
703  ss << material->getVector(k)->meanValue() << " ";
704  ++k;
705  }
706  ss << std::endl;
707  }
708  GetOStream(Runtime0) << ss.str();
709  }
710 
711  auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
712  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
713  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
714 
715  MueLu_runDroppingFunctors(drop_boundaries,
716  preserve_diagonals,
717  cut_drop);
718  }
719  }
720  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
721  if (distanceLaplacianMetric == "unweighted") {
722  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
723  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
724  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
725 
726  MueLu_runDroppingFunctors(drop_boundaries,
727  preserve_diagonals,
728  cut_drop);
729  } else if (distanceLaplacianMetric == "material") {
730  auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
731  if (material->getNumVectors() == 1) {
732  GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
733 
734  auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
735  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
736  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
737 
738  MueLu_runDroppingFunctors(drop_boundaries,
739  preserve_diagonals,
740  cut_drop);
741  } else {
742  TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
743 
744  {
745  std::stringstream ss;
746  ss << "material tensor mean =" << std::endl;
747  size_t k = 0;
748  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
749  ss << " ";
750  for (size_t j = 0; j < coords->getNumVectors(); ++j) {
751  ss << material->getVector(k)->meanValue() << " ";
752  ++k;
753  }
754  ss << std::endl;
755  }
756  GetOStream(Runtime0) << ss.str();
757  }
758 
759  auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
760  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
761  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
762 
763  MueLu_runDroppingFunctors(drop_boundaries,
764  preserve_diagonals,
765  cut_drop);
766  }
767  }
768 
769  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
770 
771  MueLu_runDroppingFunctors(symmetrize);
772  } else {
773  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\"");
774  }
775  }
776  } else if (algo == "block diagonal") {
777  auto BlockNumbers = GetBlockNumberMVs(currentLevel);
778  auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
779 
780  MueLu_runDroppingFunctors(block_diagonalize);
781  } else {
782  TEUCHOS_ASSERT(false);
783  }
784  } else {
785  Kokkos::deep_copy(results, KEEP);
786  // FIXME: This seems inconsistent
787  // MueLu_runDroppingFunctors(drop_boundaries);
788  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
790  }
791 #undef MueLu_runDroppingFunctors
792  }
793  GO numDropped = lclA.nnz() - nnz_filtered;
794  // We now know the number of entries of filtered A and have the final rowptr.
795 
797  // Pass 4: Create local matrix for filtered A
798  //
799  // Dropped entries are optionally lumped to the diagonal.
800 
801  RCP<Matrix> filteredA;
802  RCP<LWGraph_kokkos> graph;
803  {
804  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
805 
806  local_matrix_type lclFilteredA;
807  local_graph_type lclGraph;
808  if (reuseGraph) {
809  filteredA = MatrixFactory::BuildCopy(A);
810  lclFilteredA = filteredA->getLocalMatrixDevice();
811 
812  auto colidx = entries_type("entries", nnz_filtered);
813  lclGraph = local_graph_type(colidx, filtered_rowptr);
814  } else {
815  auto colidx = entries_type("entries", nnz_filtered);
816  auto values = values_type("values", nnz_filtered);
817  lclFilteredA = local_matrix_type("filteredA",
818  lclA.numRows(), lclA.numCols(),
819  nnz_filtered,
820  values, filtered_rowptr, colidx);
821  }
822 
823  if (lumping) {
824  if (reuseGraph) {
825  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
826  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
827  } else {
828  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, true>(lclA, results, lclFilteredA, filteringDirichletThreshold);
829  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
830  }
831  } else {
832  if (reuseGraph) {
833  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
834  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
835  } else {
836  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, false>(lclA, results, lclFilteredA, filteringDirichletThreshold);
837  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
838  }
839  }
840 
841  if (!reuseGraph)
842  filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
843  filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
844 
845  if (reuseEigenvalue) {
846  // Reuse max eigenvalue from A
847  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
848  // the D^{-1}A estimate in A, may as well use it.
849  // NOTE: ML does that too
850  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
851  } else {
852  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
853  }
854 
855  if (!reuseGraph) {
856  // Use graph of filteredA as graph.
857  lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
858  }
859  graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
860  graph->SetBoundaryNodeMap(boundaryNodes);
861  }
862 
863  LO dofsPerNode = 1;
864  Set(currentLevel, "DofsPerNode", dofsPerNode);
865  Set(currentLevel, "Graph", graph);
866  Set(currentLevel, "A", filteredA);
867 
868  return std::make_tuple(numDropped, boundaryNodes);
869 }
870 
871 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
872 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
873  BuildVector(Level& currentLevel) const {
874  FactoryMonitor m(*this, "Build", currentLevel);
875 
878  using local_matrix_type = typename MatrixType::local_matrix_type;
879  using local_graph_type = typename GraphType::local_graph_type;
880  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
881  using entries_type = typename local_graph_type::entries_type::non_const_type;
882  using values_type = typename local_matrix_type::values_type::non_const_type;
883  using device_type = typename Node::device_type;
884  using memory_space = typename device_type::memory_space;
885 
886  typedef Teuchos::ScalarTraits<SC> STS;
887  typedef typename STS::magnitudeType MT;
888  const MT zero = Teuchos::ScalarTraits<MT>::zero();
889 
890  auto A = Get<RCP<Matrix>>(currentLevel, "A");
891 
892  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
893  blkSize is the number of storage blocks that must kept together during the amalgamation process.
894 
895  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
896 
897  numPDEs = blkSize * storageblocksize.
898 
899  If numPDEs==1
900  Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
901  No other values makes sense.
902 
903  If numPDEs>1
904  If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
905  If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
906  Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
907  */
908 
909  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
910  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
911 
912  auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
913 
914  const RCP<const Map> rowMap = A->getRowMap();
915  const RCP<const Map> colMap = A->getColMap();
916 
917  // build a node row map (uniqueMap = non-overlapping) and a node column map
918  // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
919  // stored in the AmalgamationInfo class container contain the local node id
920  // given a local dof id. The data is calculated in the AmalgamationFactory and
921  // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
922  const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
923  const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
924  Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
925  Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
926 
927  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
928  rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
929  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
930  colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
931 
932  // get number of local nodes
933  LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
934  typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
935  id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
936  id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
937  Kokkos::deep_copy(rowTranslation, rowTranslationView);
938  Kokkos::deep_copy(colTranslation, colTranslationView);
939 
940  // extract striding information
941  blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
942  LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
943  LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
944  if (A->IsView("stridedMaps") == true) {
945  const RCP<const Map> myMap = A->getRowMap("stridedMaps");
946  const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
947  TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
948  blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
949  blkId = strMap->getStridedBlockId();
950  if (blkId > -1)
951  blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
952  }
953 
954  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
955 
957  // Process parameterlist
958  const ParameterList& pL = GetParameterList();
959 
960  // Boundary detection
961  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
962  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
963  const LocalOrdinal dirichletNonzeroThreshold = 1;
964  const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
965  TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
966 
967  // Dropping
968  const std::string algo = pL.get<std::string>("aggregation: drop scheme");
969  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
970  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
971  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
972  MT threshold;
973  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
974  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
975  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
976  else
977  threshold = as<MT>(pL.get<double>("aggregation: drop tol"));
978  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
979 
980  // Fill
981  const bool lumping = pL.get<bool>("filtered matrix: use lumping");
982  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
983  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
984 
985  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
986  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
987 
988  const MT filteringDirichletThreshold = as<MT>(pL.get<double>("filtered matrix: Dirichlet threshold"));
989 
990  TEUCHOS_ASSERT(!useRootStencil);
991  TEUCHOS_ASSERT(!useSpreadLumping);
992 
993  if (algo == "classical") {
994  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
995  } else if (algo == "distance laplacian") {
996  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\" distance laplacian metric = \"" << distanceLaplacianMetric << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
997  } else
998  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
999 
1001  // We perform four sweeps over the rows of A:
1002  // Pass 1: detection of boundary nodes
1003  // Pass 2: diagonal extraction
1004  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
1005  // Pass 4: fill of the filtered matrix
1006  //
1007  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
1008 
1009  // TODO: We could merge pass 1 and 2.
1010 
1011  auto crsA = toCrsMatrix(A);
1012  auto lclA = crsA->getLocalMatrixDevice();
1013  auto range = range_type(0, numNodes);
1014 
1016  // Pass 1: Detect boundary nodes
1017  //
1018  // The following criteria are available:
1019  // - BoundaryDetection::VectorDirichletFunctor
1020  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
1021 
1022  // Dirichlet nodes
1023  auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
1024  {
1025  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
1026 
1027 #define MueLu_runBoundaryFunctors(...) \
1028  { \
1029  auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
1030  Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
1031  }
1032 
1033  if (useGreedyDirichlet) {
1034  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
1035  MueLu_runBoundaryFunctors(dirichlet_detection);
1036  } else {
1037  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
1038  MueLu_runBoundaryFunctors(dirichlet_detection);
1039  }
1040 #undef MueLu_runBoundaryFunctors
1041  }
1042  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
1043  // Otherwise we're now done with it now.
1044 
1046  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
1047  // rowptr of filtered matrix
1048  //
1049  // The following criteria are available:
1050  // - Misc::VectorDropBoundaryFunctor
1051  // Drop all rows that have been marked as Dirichlet
1052  // - Misc::DropOffRankFunctor
1053  // Drop all entries that are off-rank
1054  // - ClassicalDropping::SAFunctor
1055  // Classical dropping
1056  // - ClassicalDropping::SignedRSFunctor
1057  // Classical RS dropping
1058  // - ClassicalDropping::SignedSAFunctor
1059  // Classical signed SA dropping
1060  // - DistanceLaplacian::DropFunctor
1061  // Distance Laplacian dropping
1062  // - Misc::KeepDiagonalFunctor
1063  // Mark diagonal as KEEP
1064  // - Misc::MarkSingletonFunctor
1065  // Mark singletons after dropping as Dirichlet
1066 
1067  // rowptr of filtered A
1068  auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
1069  auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
1070  // Number of nonzeros of filtered A and graph
1071  Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
1072 
1073  // dropping decisions for each entry
1074  auto results = Kokkos::View<DecisionType*, memory_space>("results", lclA.nnz()); // initialized to UNDECIDED
1075  {
1076  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
1077 
1078  std::string functorLabel = "MueLu::CoalesceDrop::CountEntries";
1079 
1080 #if !defined(HAVE_MUELU_DEBUG)
1081 #define MueLu_runDroppingFunctors(...) \
1082  { \
1083  auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__); \
1084  Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz); \
1085  }
1086 #else
1087 #define MueLu_runDroppingFunctors(...) \
1088  { \
1089  auto debug = Misc::DebugFunctor(lclA, results); \
1090  auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__, debug); \
1091  Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz); \
1092  }
1093 #endif
1094 
1095  auto drop_boundaries = Misc::VectorDropBoundaryFunctor(lclA, rowTranslation, boundaryNodes, results);
1096 
1097  if (threshold != zero) {
1098  auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results);
1099  auto mark_singletons_as_boundary = Misc::MarkSingletonVectorFunctor(lclA, rowTranslation, boundaryNodes, results);
1100 
1101  if (algo == "classical") {
1102  if (classicalAlgoStr == "default") {
1103  auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results);
1104 
1105  if (aggregationMayCreateDirichlet) {
1106  MueLu_runDroppingFunctors(classical_dropping,
1107  // drop_boundaries,
1108  preserve_diagonals,
1109  mark_singletons_as_boundary);
1110  } else {
1111  MueLu_runDroppingFunctors(classical_dropping,
1112  // drop_boundaries,
1113  preserve_diagonals);
1114  }
1115  } else if (classicalAlgoStr == "unscaled cut") {
1116  TEUCHOS_ASSERT(false);
1117  } else if (classicalAlgoStr == "scaled cut") {
1118  TEUCHOS_ASSERT(false);
1119  } else if (classicalAlgoStr == "scaled cut symmetric") {
1120  TEUCHOS_ASSERT(false);
1121  } else {
1122  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\"");
1123  }
1124  } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
1125  auto signed_classical_rs_dropping = ClassicalDropping::SignedRSFunctor(*A, threshold, results);
1126 
1127  if (aggregationMayCreateDirichlet) {
1128  MueLu_runDroppingFunctors(signed_classical_rs_dropping,
1129  // drop_boundaries,
1130  preserve_diagonals,
1131  mark_singletons_as_boundary);
1132 
1133  } else {
1134  MueLu_runDroppingFunctors(signed_classical_rs_dropping,
1135  // drop_boundaries,
1136  preserve_diagonals);
1137  }
1138  } else if (algo == "signed classical sa") {
1139  auto signed_classical_sa_dropping = ClassicalDropping::SignedSAFunctor(*A, threshold, results);
1140 
1141  if (aggregationMayCreateDirichlet) {
1142  MueLu_runDroppingFunctors(signed_classical_sa_dropping,
1143  // drop_boundaries,
1144  preserve_diagonals,
1145  mark_singletons_as_boundary);
1146 
1147  } else {
1148  MueLu_runDroppingFunctors(signed_classical_sa_dropping,
1149  // drop_boundaries,
1150  preserve_diagonals);
1151  }
1152  } else if (algo == "distance laplacian") {
1154  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
1155 
1156  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
1157 
1158  if (distanceLaplacianAlgoStr == "default") {
1159  auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
1160 
1161  if (aggregationMayCreateDirichlet) {
1162  MueLu_runDroppingFunctors(dist_laplacian_dropping,
1163  // drop_boundaries,
1164  preserve_diagonals,
1165  mark_singletons_as_boundary);
1166  } else {
1167  MueLu_runDroppingFunctors(dist_laplacian_dropping,
1168  // drop_boundaries,
1169  preserve_diagonals);
1170  }
1171  } else if (distanceLaplacianAlgoStr == "unscaled cut") {
1172  TEUCHOS_ASSERT(false);
1173  } else if (distanceLaplacianAlgoStr == "scaled cut") {
1174  TEUCHOS_ASSERT(false);
1175  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
1176  TEUCHOS_ASSERT(false);
1177  } else {
1178  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\"");
1179  }
1180  } else {
1181  TEUCHOS_ASSERT(false);
1182  }
1183  } else {
1184  Kokkos::deep_copy(results, KEEP);
1185  // MueLu_runDroppingFunctors(drop_boundaries);
1186  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
1188  }
1189 #undef MueLu_runDroppingFunctors
1190  }
1191  LocalOrdinal nnz_filtered = nnz.first;
1192  LocalOrdinal nnz_graph = nnz.second;
1193  GO numTotal = lclA.nnz();
1194  GO numDropped = numTotal - nnz_filtered;
1195  // We now know the number of entries of filtered A and have the final rowptr.
1196 
1198  // Pass 4: Create local matrix for filtered A
1199  //
1200  // Dropped entries are optionally lumped to the diagonal.
1201 
1202  RCP<Matrix> filteredA;
1203  RCP<LWGraph_kokkos> graph;
1204  {
1205  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
1206 
1207  local_matrix_type lclFilteredA;
1208  if (reuseGraph) {
1209  lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
1210  } else {
1211  auto colidx = entries_type("entries", nnz_filtered);
1212  auto values = values_type("values", nnz_filtered);
1213  lclFilteredA = local_matrix_type("filteredA",
1214  lclA.numRows(), lclA.numCols(),
1215  nnz_filtered,
1216  values, filtered_rowptr, colidx);
1217  }
1218 
1219  local_graph_type lclGraph;
1220  {
1221  auto colidx = entries_type("entries", nnz_graph);
1222  lclGraph = local_graph_type(colidx, graph_rowptr);
1223  }
1224 
1225  if (lumping) {
1226  if (reuseGraph) {
1227  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1228  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
1229  } else {
1230  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1231  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
1232  }
1233  } else {
1234  if (reuseGraph) {
1235  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1236  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
1237  } else {
1238  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1239  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
1240  }
1241  }
1242 
1243  filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
1244  filteredA->SetFixedBlockSize(blkSize);
1245 
1246  if (reuseEigenvalue) {
1247  // Reuse max eigenvalue from A
1248  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
1249  // the D^{-1}A estimate in A, may as well use it.
1250  // NOTE: ML does that too
1251  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
1252  } else {
1253  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
1254  }
1255 
1256  graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1257  graph->SetBoundaryNodeMap(boundaryNodes);
1258  }
1259 
1260  LO dofsPerNode = blkSize;
1261 
1262  Set(currentLevel, "DofsPerNode", dofsPerNode);
1263  Set(currentLevel, "Graph", graph);
1264  Set(currentLevel, "A", filteredA);
1265 
1266  return std::make_tuple(numDropped, boundaryNodes);
1267 }
1268 
1269 } // namespace MueLu
1270 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
Drops entries the unscaled distance Laplacian.
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
void setValidator(RCP< const ParameterEntryValidator > const &validator)
Functor that drops boundary nodes for a blockSize &gt; 1 problem.
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
Classical smoothed aggregation dropping criterion.
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
Signed classical Ruge-Stueben dropping criterion.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
#define MueLu_runBoundaryFunctors(...)
LocalOrdinal LO
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Signed classical smoothed aggregation dropping criterion.
Orders entries of row by .
Orders entries of row by where is the distance Laplacian.
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops boundary nodes for a blockSize == 1 problem.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level &currentLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Order each row by a criterion, compare the ratio of values and drop all entries once the ratio is bel...
Functor for marking nodes as Dirichlet.
Functor that drops all entries that are not on the block diagonal.
Functor that marks diagonal as kept, unless the are already marked as boundary.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
size_type size() const
#define SET_VALID_ENTRY(name)
std::tuple< RCP< LocalOrdinalVector >, RCP< LocalOrdinalVector > > GetBlockNumberMVs(Level &currentLevel) const
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
Node NO
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)
Orders entries of row by .
#define MueLu_runDroppingFunctors(...)
bool is_null() const