MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12 
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
15 #include <sstream>
16 #include <tuple>
17 
18 #include "Xpetra_Matrix.hpp"
19 
21 
22 #include "MueLu_AmalgamationInfo.hpp"
23 #include "MueLu_Exceptions.hpp"
24 #include "MueLu_Level.hpp"
25 #include "MueLu_LWGraph_kokkos.hpp"
26 #include "MueLu_MasterList.hpp"
27 #include "MueLu_Monitor.hpp"
28 
29 // #define MUELU_COALESCE_DROP_DEBUG 1
30 
33 #include "MueLu_CutDrop.hpp"
34 #include "MueLu_DroppingCommon.hpp"
37 
38 namespace MueLu {
39 
40 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
42  RCP<ParameterList> validParamList = rcp(new ParameterList());
43 
44 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
45  SET_VALID_ENTRY("aggregation: drop tol");
46  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
47  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
48  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
49  SET_VALID_ENTRY("aggregation: row sum drop tol");
50  SET_VALID_ENTRY("aggregation: drop scheme");
51  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
52  SET_VALID_ENTRY("aggregation: distance laplacian metric");
53  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
54  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
55  SET_VALID_ENTRY("aggregation: distance laplacian algo");
56  SET_VALID_ENTRY("aggregation: classical algo");
57  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
58 
59  SET_VALID_ENTRY("filtered matrix: use lumping");
60  SET_VALID_ENTRY("filtered matrix: reuse graph");
61  SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
62 
63  SET_VALID_ENTRY("filtered matrix: use root stencil");
64  SET_VALID_ENTRY("filtered matrix: use spread lumping");
65  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
66  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
67  SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
68 
69 #undef SET_VALID_ENTRY
70  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
71 
72  // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
73  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"))));
74  validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
75  validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
76  validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
77 
78  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
79  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
80  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
81  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
82  validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
83 
84  return validParamList;
85 }
86 
87 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
89  Input(currentLevel, "A");
90  Input(currentLevel, "UnAmalgamationInfo");
91 
92  const ParameterList& pL = GetParameterList();
93  std::string algo = pL.get<std::string>("aggregation: drop scheme");
94  std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
95  if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
96  Input(currentLevel, "Coordinates");
97  if (distLaplMetric == "material")
98  Input(currentLevel, "Material");
99  }
100  if (algo == "signed classical sa")
101  ;
102  else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
103  Input(currentLevel, "BlockNumber");
104  }
105 }
106 
107 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
109  Build(Level& currentLevel) const {
110  auto A = Get<RCP<Matrix>>(currentLevel, "A");
111  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
112  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
113 
114  std::tuple<GlobalOrdinal, boundary_nodes_type> results;
115  if (blkSize == 1)
116  results = BuildScalar(currentLevel);
117  else
118  results = BuildVector(currentLevel);
119 
120  if (GetVerbLevel() & Statistics1) {
121  GlobalOrdinal numDropped = std::get<0>(results);
122  auto boundaryNodes = std::get<1>(results);
123 
124  GO numLocalBoundaryNodes = 0;
125  GO numGlobalBoundaryNodes = 0;
126 
127  Kokkos::parallel_reduce(
128  "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
129  KOKKOS_LAMBDA(const LO i, GO& n) {
130  if (boundaryNodes(i))
131  n++;
132  },
133  numLocalBoundaryNodes);
134 
135  auto comm = A->getRowMap()->getComm();
136  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
137 
138  GO numGlobalTotal = A->getGlobalNumEntries();
139  GO numGlobalDropped;
140  MueLu_sumAll(comm, numDropped, numGlobalDropped);
141 
142  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
143  if (numGlobalTotal != 0) {
144  GetOStream(Statistics1) << "Number of dropped entries: "
145  << numGlobalDropped << "/" << numGlobalTotal
146  << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
147  }
148  }
149 }
150 
151 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
152 std::tuple<Teuchos::RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>>, Teuchos::RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>>> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
153  GetBlockNumberMVs(Level& currentLevel) const {
154  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
155  RCP<LocalOrdinalVector> ghostedBlockNumber;
156  GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
157 
158  // Ghost the column block numbers if we need to
159  auto A = Get<RCP<Matrix>>(currentLevel, "A");
160  RCP<const Import> importer = A->getCrsGraph()->getImporter();
161  if (!importer.is_null()) {
162  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
163  ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
164  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
165  } else {
166  ghostedBlockNumber = BlockNumber;
167  }
168  return std::make_tuple(BlockNumber, ghostedBlockNumber);
169 }
170 
171 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
172 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
173  BuildScalar(Level& currentLevel) const {
174  FactoryMonitor m(*this, "Build", currentLevel);
175 
178  using local_matrix_type = typename MatrixType::local_matrix_type;
179  using local_graph_type = typename GraphType::local_graph_type;
180  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
181  using entries_type = typename local_graph_type::entries_type::non_const_type;
182  using values_type = typename local_matrix_type::values_type::non_const_type;
183  using device_type = typename Node::device_type;
184  using memory_space = typename device_type::memory_space;
185 
186  typedef Teuchos::ScalarTraits<SC> STS;
187  typedef typename STS::magnitudeType MT;
188  const MT zero = Teuchos::ScalarTraits<MT>::zero();
189 
190  auto A = Get<RCP<Matrix>>(currentLevel, "A");
191 
193  // Process parameterlist
194  const ParameterList& pL = GetParameterList();
195 
196  // Boundary detection
197  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
198  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
199  const LocalOrdinal dirichletNonzeroThreshold = 1;
200 
201  // Dropping
202  const std::string algo = pL.get<std::string>("aggregation: drop scheme");
203  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
204  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
205  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
206  MT threshold;
207  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
208  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
209  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
210  else
211  threshold = as<MT>(pL.get<double>("aggregation: drop tol"));
212  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
213 
214  // Fill
215  const bool lumping = pL.get<bool>("filtered matrix: use lumping");
216  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
217  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
218 
219  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
220  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
221 
222  const MT filteringDirichletThreshold = as<MT>(pL.get<double>("filtered matrix: Dirichlet threshold"));
223  TEUCHOS_ASSERT(!useRootStencil);
224  TEUCHOS_ASSERT(!useSpreadLumping);
225 
226  if (algo == "classical")
227  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
228  else if (algo == "distance laplacian")
229  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\" distance laplacian metric = \"" << distanceLaplacianMetric << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
230  else
231  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
232 
233  if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos)))
234  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
235 
236  // FIXME: Non-Kokkos implementation does this, but this seems unnecessary.
237  if (algo == "distance laplacian")
238  aggregationMayCreateDirichlet = true;
239 
241  // We perform four sweeps over the rows of A:
242  // Pass 1: detection of boundary nodes
243  // Pass 2: diagonal extraction
244  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
245  // Pass 4: fill of the filtered matrix
246  //
247  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
248 
249  // TODO: We could merge pass 1 and 2.
250 
251  auto crsA = toCrsMatrix(A);
252  auto lclA = crsA->getLocalMatrixDevice();
253  auto range = range_type(0, lclA.numRows());
254 
256  // Pass 1: Detect boundary nodes
257  //
258  // The following criteria are available:
259  // - BoundaryDetection::PointDirichletFunctor
260  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
261  // - BoundaryDetection::RowSumFunctor
262  // Marks rows as Dirichlet bases on row-sum criterion
263 
264  // Dirichlet nodes
265  auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
266  {
267  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
268 
269  // macro that applies boundary detection functors
270 #define MueLu_runBoundaryFunctors(...) \
271  { \
272  auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
273  Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
274  }
275 
276  auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
277 
278  if (rowSumTol <= 0.) {
279  MueLu_runBoundaryFunctors(dirichlet_detection);
280  } else {
281  auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
282  MueLu_runBoundaryFunctors(dirichlet_detection,
283  apply_rowsum);
284  }
285 #undef MueLu_runBoundaryFunctors
286  }
287  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
288  // Otherwise we're now done with it now.
289 
291  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
292  // rowptr of filtered matrix
293  //
294  // The following criteria are available:
295  // - Misc::PointwiseDropBoundaryFunctor
296  // Drop all rows that have been marked as Dirichlet
297  // - Misc::DropOffRankFunctor
298  // Drop all entries that are off-rank
299  // - ClassicalDropping::SAFunctor
300  // Classical dropping
301  // - ClassicalDropping::SignedRSFunctor
302  // Classical RS dropping
303  // - ClassicalDropping::SignedSAFunctor
304  // Classical signed SA dropping
305  // - DistanceLaplacian::DropFunctor
306  // Distance Laplacian dropping
307  // - Misc::KeepDiagonalFunctor
308  // Mark diagonal as KEEP
309  // - Misc::MarkSingletonFunctor
310  // Mark singletons after dropping as Dirichlet
311  // - Misc::BlockDiagonalizeFunctor
312  // Drop coupling between blocks
313  //
314  // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
315 
316  // rowptr of filtered A
317  auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
318  // Number of nonzeros of filtered A
319  LocalOrdinal nnz_filtered = 0;
320  // dropping decisions for each entry
321  auto results = Kokkos::View<DecisionType*, memory_space>("results", lclA.nnz()); // initialized to UNDECIDED
322  {
323  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
324 
325  std::string functorLabel = "MueLu::CoalesceDrop::CountEntries";
326 
327  // macro that applied dropping functors
328 #if !defined(HAVE_MUELU_DEBUG)
329 #define MueLu_runDroppingFunctors(...) \
330  { \
331  auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__); \
332  Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz_filtered); \
333  }
334 #else
335 #define MueLu_runDroppingFunctors(...) \
336  { \
337  auto debug = Misc::DebugFunctor(lclA, results); \
338  auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__, debug); \
339  Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz_filtered); \
340  }
341 #endif
342 
343  auto drop_boundaries = Misc::PointwiseDropBoundaryFunctor(lclA, boundaryNodes, results);
344 
345  if (threshold != zero) {
346  auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results);
347  auto mark_singletons_as_boundary = Misc::MarkSingletonFunctor(lclA, boundaryNodes, results);
348 
349  if (algo == "classical" || algo == "block diagonal classical") {
350  if (algo == "block diagonal classical") {
351  auto BlockNumbers = GetBlockNumberMVs(currentLevel);
352  auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
353 
354  if (classicalAlgoStr == "default") {
355  auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results);
356 
357  if (aggregationMayCreateDirichlet) {
358  MueLu_runDroppingFunctors(block_diagonalize,
359  classical_dropping,
360  drop_boundaries,
361  preserve_diagonals,
362  mark_singletons_as_boundary);
363  } else {
364  MueLu_runDroppingFunctors(block_diagonalize,
365  classical_dropping,
366  drop_boundaries,
367  preserve_diagonals);
368  }
369  } else if (classicalAlgoStr == "unscaled cut") {
370  auto comparison = CutDrop::UnscaledComparison(*A, results);
371  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
372 
373  MueLu_runDroppingFunctors(block_diagonalize,
374  drop_boundaries,
375  preserve_diagonals,
376  cut_drop);
377  } else if (classicalAlgoStr == "scaled cut") {
378  auto comparison = CutDrop::ScaledComparison(*A, results);
379  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
380 
381  MueLu_runDroppingFunctors(block_diagonalize,
382  drop_boundaries,
383  preserve_diagonals,
384  cut_drop);
385  } else if (classicalAlgoStr == "scaled cut symmetric") {
386  auto comparison = CutDrop::ScaledComparison(*A, results);
387  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
388 
389  MueLu_runDroppingFunctors(block_diagonalize,
390  drop_boundaries,
391  preserve_diagonals,
392  cut_drop);
393 
394  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
395 
396  MueLu_runDroppingFunctors(symmetrize);
397 
398  } else {
399  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\"");
400  }
401  } else {
402  if (classicalAlgoStr == "default") {
403  auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results);
404 
405  if (aggregationMayCreateDirichlet) {
406  MueLu_runDroppingFunctors(classical_dropping,
407  drop_boundaries,
408  preserve_diagonals,
409  mark_singletons_as_boundary);
410  } else {
411  MueLu_runDroppingFunctors(classical_dropping,
412  drop_boundaries,
413  preserve_diagonals);
414  }
415  } else if (classicalAlgoStr == "unscaled cut") {
416  auto comparison = CutDrop::UnscaledComparison(*A, results);
417  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
418 
419  MueLu_runDroppingFunctors(drop_boundaries,
420  preserve_diagonals,
421  cut_drop);
422  } else if (classicalAlgoStr == "scaled cut") {
423  auto comparison = CutDrop::ScaledComparison(*A, results);
424  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
425 
426  MueLu_runDroppingFunctors(drop_boundaries,
427  preserve_diagonals,
428  cut_drop);
429  } else if (classicalAlgoStr == "scaled cut symmetric") {
430  auto comparison = CutDrop::ScaledComparison(*A, results);
431  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
432 
433  MueLu_runDroppingFunctors(drop_boundaries,
434  preserve_diagonals,
435  cut_drop);
436 
437  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
438 
439  MueLu_runDroppingFunctors(symmetrize);
440 
441  } else {
442  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\"");
443  }
444  }
445  } else if (algo == "signed classical" || algo == "block diagonal signed classical" || algo == "block diagonal colored signed classical") {
446  auto signed_classical_rs_dropping = ClassicalDropping::SignedRSFunctor(*A, threshold, results);
447 
448  if (algo == "block diagonal signed classical" || algo == "block diagonal colored signed classical") {
449  auto BlockNumbers = GetBlockNumberMVs(currentLevel);
450  auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
451 
452  if (classicalAlgoStr == "default") {
453  if (aggregationMayCreateDirichlet) {
454  MueLu_runDroppingFunctors(block_diagonalize,
455  signed_classical_rs_dropping,
456  drop_boundaries,
457  preserve_diagonals,
458  mark_singletons_as_boundary);
459 
460  } else {
461  MueLu_runDroppingFunctors(block_diagonalize,
462  signed_classical_rs_dropping,
463  drop_boundaries,
464  preserve_diagonals);
465  }
466  } else {
467  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\"");
468  }
469  } else {
470  if (classicalAlgoStr == "default") {
471  if (aggregationMayCreateDirichlet) {
472  MueLu_runDroppingFunctors(signed_classical_rs_dropping,
473  drop_boundaries,
474  preserve_diagonals,
475  mark_singletons_as_boundary);
476 
477  } else {
478  MueLu_runDroppingFunctors(signed_classical_rs_dropping,
479  drop_boundaries,
480  preserve_diagonals);
481  }
482  } else {
483  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\"");
484  }
485  }
486  } else if (algo == "signed classical sa") {
487  if (classicalAlgoStr == "default") {
488  auto signed_classical_sa_dropping = ClassicalDropping::SignedSAFunctor(*A, threshold, results);
489 
490  if (aggregationMayCreateDirichlet) {
491  MueLu_runDroppingFunctors(signed_classical_sa_dropping,
492  drop_boundaries,
493  preserve_diagonals,
494  mark_singletons_as_boundary);
495 
496  } else {
497  MueLu_runDroppingFunctors(signed_classical_sa_dropping,
498  drop_boundaries,
499  preserve_diagonals);
500  }
501  } else {
502  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\"");
503  }
504  } else if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
506  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
507 
508  if (algo == "block diagonal distance laplacian") {
509  auto BlockNumbers = GetBlockNumberMVs(currentLevel);
510  auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
511 
512  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
513 
514  if (distanceLaplacianAlgoStr == "default") {
515  auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
516 
517  if (aggregationMayCreateDirichlet) {
518  MueLu_runDroppingFunctors(block_diagonalize,
519  dist_laplacian_dropping,
520  drop_boundaries,
521  preserve_diagonals,
522  mark_singletons_as_boundary);
523  } else {
524  MueLu_runDroppingFunctors(block_diagonalize,
525  dist_laplacian_dropping,
526  drop_boundaries,
527  preserve_diagonals);
528  }
529  } else if (distanceLaplacianAlgoStr == "unscaled cut") {
530  auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
531  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
532 
533  MueLu_runDroppingFunctors(block_diagonalize,
534  drop_boundaries,
535  preserve_diagonals,
536  cut_drop);
537  } else if (distanceLaplacianAlgoStr == "scaled cut") {
538  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
539  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
540 
541  MueLu_runDroppingFunctors(block_diagonalize,
542  drop_boundaries,
543  preserve_diagonals,
544  cut_drop);
545  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
546  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
547  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
548 
549  MueLu_runDroppingFunctors(block_diagonalize,
550  drop_boundaries,
551  cut_drop,
552  preserve_diagonals);
553 
554  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
555 
556  MueLu_runDroppingFunctors(symmetrize);
557  } else {
558  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\"");
559  }
560  } else {
561  if (distanceLaplacianAlgoStr == "default") {
562  if (distanceLaplacianMetric == "unweighted") {
563  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
564  auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
565 
566  if (aggregationMayCreateDirichlet) {
567  MueLu_runDroppingFunctors(dist_laplacian_dropping,
568  drop_boundaries,
569  preserve_diagonals,
570  mark_singletons_as_boundary);
571  } else {
572  MueLu_runDroppingFunctors(dist_laplacian_dropping,
573  drop_boundaries,
574  preserve_diagonals);
575  }
576  } else if (distanceLaplacianMetric == "material") {
577  auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
578  if (material->getNumVectors() == 1) {
579  GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
580 
581  auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
582  auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
583 
584  if (aggregationMayCreateDirichlet) {
585  MueLu_runDroppingFunctors(dist_laplacian_dropping,
586  drop_boundaries,
587  preserve_diagonals,
588  mark_singletons_as_boundary);
589  } else {
590  MueLu_runDroppingFunctors(dist_laplacian_dropping,
591  drop_boundaries,
592  preserve_diagonals);
593  }
594  } else {
595  TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
596 
597  {
598  std::stringstream ss;
599  ss << "material tensor mean =" << std::endl;
600  size_t k = 0;
601  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
602  ss << " ";
603  for (size_t j = 0; j < coords->getNumVectors(); ++j) {
604  ss << material->getVector(k)->meanValue() << " ";
605  ++k;
606  }
607  ss << std::endl;
608  }
609  GetOStream(Runtime0) << ss.str();
610  }
611 
612  auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
613  auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
614 
615  if (aggregationMayCreateDirichlet) {
616  MueLu_runDroppingFunctors(dist_laplacian_dropping,
617  drop_boundaries,
618  preserve_diagonals,
619  mark_singletons_as_boundary);
620  } else {
621  MueLu_runDroppingFunctors(dist_laplacian_dropping,
622  drop_boundaries,
623  preserve_diagonals);
624  }
625  }
626  }
627 
628  } else if (distanceLaplacianAlgoStr == "unscaled cut") {
629  if (distanceLaplacianMetric == "unweighted") {
630  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
631  auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
632  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
633 
634  MueLu_runDroppingFunctors(drop_boundaries,
635  preserve_diagonals,
636  cut_drop);
637  } else if (distanceLaplacianMetric == "material") {
638  auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
639  if (material->getNumVectors() == 1) {
640  GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
641 
642  auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
643  auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
644  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
645 
646  MueLu_runDroppingFunctors(drop_boundaries,
647  preserve_diagonals,
648  cut_drop);
649  } else {
650  TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
651 
652  {
653  std::stringstream ss;
654  ss << "material tensor mean =" << std::endl;
655  size_t k = 0;
656  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
657  ss << " ";
658  for (size_t j = 0; j < coords->getNumVectors(); ++j) {
659  ss << material->getVector(k)->meanValue() << " ";
660  ++k;
661  }
662  ss << std::endl;
663  }
664  GetOStream(Runtime0) << ss.str();
665  }
666 
667  auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
668  auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
669  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
670 
671  MueLu_runDroppingFunctors(drop_boundaries,
672  preserve_diagonals,
673  cut_drop);
674  }
675  }
676  } else if (distanceLaplacianAlgoStr == "scaled cut") {
677  if (distanceLaplacianMetric == "unweighted") {
678  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
679  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
680  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
681 
682  MueLu_runDroppingFunctors(drop_boundaries,
683  preserve_diagonals,
684  cut_drop);
685  } else if (distanceLaplacianMetric == "material") {
686  auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
687  if (material->getNumVectors() == 1) {
688  GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
689 
690  auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
691  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
692  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
693 
694  MueLu_runDroppingFunctors(drop_boundaries,
695  preserve_diagonals,
696  cut_drop);
697  } else {
698  TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
699 
700  {
701  std::stringstream ss;
702  ss << "material tensor mean =" << std::endl;
703  size_t k = 0;
704  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
705  ss << " ";
706  for (size_t j = 0; j < coords->getNumVectors(); ++j) {
707  ss << material->getVector(k)->meanValue() << " ";
708  ++k;
709  }
710  ss << std::endl;
711  }
712  GetOStream(Runtime0) << ss.str();
713  }
714 
715  auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
716  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
717  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
718 
719  MueLu_runDroppingFunctors(drop_boundaries,
720  preserve_diagonals,
721  cut_drop);
722  }
723  }
724  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
725  if (distanceLaplacianMetric == "unweighted") {
726  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
727  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
728  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
729 
730  MueLu_runDroppingFunctors(drop_boundaries,
731  preserve_diagonals,
732  cut_drop);
733  } else if (distanceLaplacianMetric == "material") {
734  auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
735  if (material->getNumVectors() == 1) {
736  GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
737 
738  auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
739  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
740  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
741 
742  MueLu_runDroppingFunctors(drop_boundaries,
743  preserve_diagonals,
744  cut_drop);
745  } else {
746  TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
747 
748  {
749  std::stringstream ss;
750  ss << "material tensor mean =" << std::endl;
751  size_t k = 0;
752  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
753  ss << " ";
754  for (size_t j = 0; j < coords->getNumVectors(); ++j) {
755  ss << material->getVector(k)->meanValue() << " ";
756  ++k;
757  }
758  ss << std::endl;
759  }
760  GetOStream(Runtime0) << ss.str();
761  }
762 
763  auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
764  auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
765  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
766 
767  MueLu_runDroppingFunctors(drop_boundaries,
768  preserve_diagonals,
769  cut_drop);
770  }
771  }
772 
773  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
774 
775  MueLu_runDroppingFunctors(symmetrize);
776  } else {
777  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\"");
778  }
779  }
780  } else if (algo == "block diagonal") {
781  auto BlockNumbers = GetBlockNumberMVs(currentLevel);
782  auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
783 
784  MueLu_runDroppingFunctors(block_diagonalize);
785  } else {
786  TEUCHOS_ASSERT(false);
787  }
788  } else {
789  Kokkos::deep_copy(results, KEEP);
790  // FIXME: This seems inconsistent
791  // MueLu_runDroppingFunctors(drop_boundaries);
792  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
794  }
795 #undef MueLu_runDroppingFunctors
796  }
797  GO numDropped = lclA.nnz() - nnz_filtered;
798  // We now know the number of entries of filtered A and have the final rowptr.
799 
801  // Pass 4: Create local matrix for filtered A
802  //
803  // Dropped entries are optionally lumped to the diagonal.
804 
805  RCP<Matrix> filteredA;
806  RCP<LWGraph_kokkos> graph;
807  {
808  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
809 
810  local_matrix_type lclFilteredA;
811  local_graph_type lclGraph;
812  if (reuseGraph) {
813  filteredA = MatrixFactory::BuildCopy(A);
814  lclFilteredA = filteredA->getLocalMatrixDevice();
815 
816  auto colidx = entries_type("entries", nnz_filtered);
817  lclGraph = local_graph_type(colidx, filtered_rowptr);
818  } else {
819  auto colidx = entries_type("entries", nnz_filtered);
820  auto values = values_type("values", nnz_filtered);
821  lclFilteredA = local_matrix_type("filteredA",
822  lclA.numRows(), lclA.numCols(),
823  nnz_filtered,
824  values, filtered_rowptr, colidx);
825  }
826 
827  if (lumping) {
828  if (reuseGraph) {
829  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
830  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
831  } else {
832  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, true>(lclA, results, lclFilteredA, filteringDirichletThreshold);
833  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
834  }
835  } else {
836  if (reuseGraph) {
837  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
838  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
839  } else {
840  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, false>(lclA, results, lclFilteredA, filteringDirichletThreshold);
841  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
842  }
843  }
844 
845  if (!reuseGraph)
846  filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
847  filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
848 
849  if (reuseEigenvalue) {
850  // Reuse max eigenvalue from A
851  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
852  // the D^{-1}A estimate in A, may as well use it.
853  // NOTE: ML does that too
854  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
855  } else {
856  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
857  }
858 
859  if (!reuseGraph) {
860  // Use graph of filteredA as graph.
861  lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
862  }
863  graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
864  graph->SetBoundaryNodeMap(boundaryNodes);
865  }
866 
867  LO dofsPerNode = 1;
868  Set(currentLevel, "DofsPerNode", dofsPerNode);
869  Set(currentLevel, "Graph", graph);
870  Set(currentLevel, "A", filteredA);
871 
872  return std::make_tuple(numDropped, boundaryNodes);
873 }
874 
875 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
876 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
877  BuildVector(Level& currentLevel) const {
878  FactoryMonitor m(*this, "Build", currentLevel);
879 
882  using local_matrix_type = typename MatrixType::local_matrix_type;
883  using local_graph_type = typename GraphType::local_graph_type;
884  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
885  using entries_type = typename local_graph_type::entries_type::non_const_type;
886  using values_type = typename local_matrix_type::values_type::non_const_type;
887  using device_type = typename Node::device_type;
888  using memory_space = typename device_type::memory_space;
889 
890  typedef Teuchos::ScalarTraits<SC> STS;
891  typedef typename STS::magnitudeType MT;
892  const MT zero = Teuchos::ScalarTraits<MT>::zero();
893 
894  auto A = Get<RCP<Matrix>>(currentLevel, "A");
895 
896  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
897  blkSize is the number of storage blocks that must kept together during the amalgamation process.
898 
899  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
900 
901  numPDEs = blkSize * storageblocksize.
902 
903  If numPDEs==1
904  Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
905  No other values makes sense.
906 
907  If numPDEs>1
908  If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
909  If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
910  Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
911  */
912 
913  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
914  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
915 
916  auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
917 
918  const RCP<const Map> rowMap = A->getRowMap();
919  const RCP<const Map> colMap = A->getColMap();
920 
921  // build a node row map (uniqueMap = non-overlapping) and a node column map
922  // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
923  // stored in the AmalgamationInfo class container contain the local node id
924  // given a local dof id. The data is calculated in the AmalgamationFactory and
925  // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
926  const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
927  const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
928  Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
929  Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
930 
931  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
932  rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
933  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
934  colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
935 
936  // get number of local nodes
937  LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
938  typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
939  id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
940  id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
941  Kokkos::deep_copy(rowTranslation, rowTranslationView);
942  Kokkos::deep_copy(colTranslation, colTranslationView);
943 
944  // extract striding information
945  blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
946  LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
947  LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
948  if (A->IsView("stridedMaps") == true) {
949  const RCP<const Map> myMap = A->getRowMap("stridedMaps");
950  const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
951  TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
952  blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
953  blkId = strMap->getStridedBlockId();
954  if (blkId > -1)
955  blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
956  }
957 
958  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
959 
961  // Process parameterlist
962  const ParameterList& pL = GetParameterList();
963 
964  // Boundary detection
965  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
966  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
967  const LocalOrdinal dirichletNonzeroThreshold = 1;
968  const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
969  TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
970 
971  // Dropping
972  const std::string algo = pL.get<std::string>("aggregation: drop scheme");
973  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
974  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
975  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
976  MT threshold;
977  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
978  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
979  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
980  else
981  threshold = as<MT>(pL.get<double>("aggregation: drop tol"));
982  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
983 
984  // Fill
985  const bool lumping = pL.get<bool>("filtered matrix: use lumping");
986  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
987  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
988 
989  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
990  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
991 
992  const MT filteringDirichletThreshold = as<MT>(pL.get<double>("filtered matrix: Dirichlet threshold"));
993 
994  TEUCHOS_ASSERT(!useRootStencil);
995  TEUCHOS_ASSERT(!useSpreadLumping);
996 
997  if (algo == "classical") {
998  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
999  } else if (algo == "distance laplacian") {
1000  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\" distance laplacian metric = \"" << distanceLaplacianMetric << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1001  } else
1002  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1003 
1004  // FIXME: Non-Kokkos implementation does this, but this seems unnecessary.
1005  if (algo == "distance laplacian")
1006  aggregationMayCreateDirichlet = true;
1007 
1009  // We perform four sweeps over the rows of A:
1010  // Pass 1: detection of boundary nodes
1011  // Pass 2: diagonal extraction
1012  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
1013  // Pass 4: fill of the filtered matrix
1014  //
1015  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
1016 
1017  // TODO: We could merge pass 1 and 2.
1018 
1019  auto crsA = toCrsMatrix(A);
1020  auto lclA = crsA->getLocalMatrixDevice();
1021  auto range = range_type(0, numNodes);
1022 
1024  // Pass 1: Detect boundary nodes
1025  //
1026  // The following criteria are available:
1027  // - BoundaryDetection::VectorDirichletFunctor
1028  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
1029 
1030  // Dirichlet nodes
1031  auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
1032  {
1033  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
1034 
1035 #define MueLu_runBoundaryFunctors(...) \
1036  { \
1037  auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
1038  Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
1039  }
1040 
1041  if (useGreedyDirichlet) {
1042  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
1043  MueLu_runBoundaryFunctors(dirichlet_detection);
1044  } else {
1045  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
1046  MueLu_runBoundaryFunctors(dirichlet_detection);
1047  }
1048 #undef MueLu_runBoundaryFunctors
1049  }
1050  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
1051  // Otherwise we're now done with it now.
1052 
1054  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
1055  // rowptr of filtered matrix
1056  //
1057  // The following criteria are available:
1058  // - Misc::VectorDropBoundaryFunctor
1059  // Drop all rows that have been marked as Dirichlet
1060  // - Misc::DropOffRankFunctor
1061  // Drop all entries that are off-rank
1062  // - ClassicalDropping::SAFunctor
1063  // Classical dropping
1064  // - ClassicalDropping::SignedRSFunctor
1065  // Classical RS dropping
1066  // - ClassicalDropping::SignedSAFunctor
1067  // Classical signed SA dropping
1068  // - DistanceLaplacian::DropFunctor
1069  // Distance Laplacian dropping
1070  // - Misc::KeepDiagonalFunctor
1071  // Mark diagonal as KEEP
1072  // - Misc::MarkSingletonFunctor
1073  // Mark singletons after dropping as Dirichlet
1074 
1075  // rowptr of filtered A
1076  auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
1077  auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
1078  // Number of nonzeros of filtered A and graph
1079  Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
1080 
1081  // dropping decisions for each entry
1082  auto results = Kokkos::View<DecisionType*, memory_space>("results", lclA.nnz()); // initialized to UNDECIDED
1083  {
1084  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
1085 
1086  std::string functorLabel = "MueLu::CoalesceDrop::CountEntries";
1087 
1088 #if !defined(HAVE_MUELU_DEBUG)
1089 #define MueLu_runDroppingFunctors(...) \
1090  { \
1091  auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__); \
1092  Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz); \
1093  }
1094 #else
1095 #define MueLu_runDroppingFunctors(...) \
1096  { \
1097  auto debug = Misc::DebugFunctor(lclA, results); \
1098  auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__, debug); \
1099  Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz); \
1100  }
1101 #endif
1102 
1103  auto drop_boundaries = Misc::VectorDropBoundaryFunctor(lclA, rowTranslation, boundaryNodes, results);
1104 
1105  if (threshold != zero) {
1106  auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results);
1107  auto mark_singletons_as_boundary = Misc::MarkSingletonVectorFunctor(lclA, rowTranslation, boundaryNodes, results);
1108 
1109  if (algo == "classical") {
1110  if (classicalAlgoStr == "default") {
1111  auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results);
1112 
1113  if (aggregationMayCreateDirichlet) {
1114  MueLu_runDroppingFunctors(classical_dropping,
1115  // drop_boundaries,
1116  preserve_diagonals,
1117  mark_singletons_as_boundary);
1118  } else {
1119  MueLu_runDroppingFunctors(classical_dropping,
1120  // drop_boundaries,
1121  preserve_diagonals);
1122  }
1123  } else if (classicalAlgoStr == "unscaled cut") {
1124  TEUCHOS_ASSERT(false);
1125  } else if (classicalAlgoStr == "scaled cut") {
1126  TEUCHOS_ASSERT(false);
1127  } else if (classicalAlgoStr == "scaled cut symmetric") {
1128  TEUCHOS_ASSERT(false);
1129  } else {
1130  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\"");
1131  }
1132  } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
1133  auto signed_classical_rs_dropping = ClassicalDropping::SignedRSFunctor(*A, threshold, results);
1134 
1135  if (aggregationMayCreateDirichlet) {
1136  MueLu_runDroppingFunctors(signed_classical_rs_dropping,
1137  // drop_boundaries,
1138  preserve_diagonals,
1139  mark_singletons_as_boundary);
1140 
1141  } else {
1142  MueLu_runDroppingFunctors(signed_classical_rs_dropping,
1143  // drop_boundaries,
1144  preserve_diagonals);
1145  }
1146  } else if (algo == "signed classical sa") {
1147  auto signed_classical_sa_dropping = ClassicalDropping::SignedSAFunctor(*A, threshold, results);
1148 
1149  if (aggregationMayCreateDirichlet) {
1150  MueLu_runDroppingFunctors(signed_classical_sa_dropping,
1151  // drop_boundaries,
1152  preserve_diagonals,
1153  mark_singletons_as_boundary);
1154 
1155  } else {
1156  MueLu_runDroppingFunctors(signed_classical_sa_dropping,
1157  // drop_boundaries,
1158  preserve_diagonals);
1159  }
1160  } else if (algo == "distance laplacian") {
1162  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
1163 
1164  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
1165 
1166  if (distanceLaplacianAlgoStr == "default") {
1167  auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
1168 
1169  if (aggregationMayCreateDirichlet) {
1170  MueLu_runDroppingFunctors(dist_laplacian_dropping,
1171  // drop_boundaries,
1172  preserve_diagonals,
1173  mark_singletons_as_boundary);
1174  } else {
1175  MueLu_runDroppingFunctors(dist_laplacian_dropping,
1176  // drop_boundaries,
1177  preserve_diagonals);
1178  }
1179  } else if (distanceLaplacianAlgoStr == "unscaled cut") {
1180  TEUCHOS_ASSERT(false);
1181  } else if (distanceLaplacianAlgoStr == "scaled cut") {
1182  TEUCHOS_ASSERT(false);
1183  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
1184  TEUCHOS_ASSERT(false);
1185  } else {
1186  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\"");
1187  }
1188  } else {
1189  TEUCHOS_ASSERT(false);
1190  }
1191  } else {
1192  Kokkos::deep_copy(results, KEEP);
1193  // MueLu_runDroppingFunctors(drop_boundaries);
1194  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
1196  }
1197 #undef MueLu_runDroppingFunctors
1198  }
1199  LocalOrdinal nnz_filtered = nnz.first;
1200  LocalOrdinal nnz_graph = nnz.second;
1201  GO numTotal = lclA.nnz();
1202  GO numDropped = numTotal - nnz_filtered;
1203  // We now know the number of entries of filtered A and have the final rowptr.
1204 
1206  // Pass 4: Create local matrix for filtered A
1207  //
1208  // Dropped entries are optionally lumped to the diagonal.
1209 
1210  RCP<Matrix> filteredA;
1211  RCP<LWGraph_kokkos> graph;
1212  {
1213  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
1214 
1215  local_matrix_type lclFilteredA;
1216  if (reuseGraph) {
1217  lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
1218  } else {
1219  auto colidx = entries_type("entries", nnz_filtered);
1220  auto values = values_type("values", nnz_filtered);
1221  lclFilteredA = local_matrix_type("filteredA",
1222  lclA.numRows(), lclA.numCols(),
1223  nnz_filtered,
1224  values, filtered_rowptr, colidx);
1225  }
1226 
1227  local_graph_type lclGraph;
1228  {
1229  auto colidx = entries_type("entries", nnz_graph);
1230  lclGraph = local_graph_type(colidx, graph_rowptr);
1231  }
1232 
1233  if (lumping) {
1234  if (reuseGraph) {
1235  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1236  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
1237  } else {
1238  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1239  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
1240  }
1241  } else {
1242  if (reuseGraph) {
1243  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1244  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
1245  } else {
1246  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1247  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
1248  }
1249  }
1250 
1251  filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
1252  filteredA->SetFixedBlockSize(blkSize);
1253 
1254  if (reuseEigenvalue) {
1255  // Reuse max eigenvalue from A
1256  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
1257  // the D^{-1}A estimate in A, may as well use it.
1258  // NOTE: ML does that too
1259  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
1260  } else {
1261  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
1262  }
1263 
1264  graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1265  graph->SetBoundaryNodeMap(boundaryNodes);
1266  }
1267 
1268  LO dofsPerNode = blkSize;
1269 
1270  Set(currentLevel, "DofsPerNode", dofsPerNode);
1271  Set(currentLevel, "Graph", graph);
1272  Set(currentLevel, "A", filteredA);
1273 
1274  return std::make_tuple(numDropped, boundaryNodes);
1275 }
1276 
1277 } // namespace MueLu
1278 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
Drops entries the unscaled distance Laplacian.
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
void setValidator(RCP< const ParameterEntryValidator > const &validator)
Functor that drops boundary nodes for a blockSize &gt; 1 problem.
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
Classical smoothed aggregation dropping criterion.
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
Signed classical Ruge-Stueben dropping criterion.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
#define MueLu_runBoundaryFunctors(...)
LocalOrdinal LO
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Signed classical smoothed aggregation dropping criterion.
Orders entries of row by .
Orders entries of row by where is the distance Laplacian.
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops boundary nodes for a blockSize == 1 problem.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level &currentLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Order each row by a criterion, compare the ratio of values and drop all entries once the ratio is bel...
Functor for marking nodes as Dirichlet.
Functor that drops all entries that are not on the block diagonal.
Functor that marks diagonal as kept, unless the are already marked as boundary.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
size_type size() const
#define SET_VALID_ENTRY(name)
std::tuple< RCP< LocalOrdinalVector >, RCP< LocalOrdinalVector > > GetBlockNumberMVs(Level &currentLevel) const
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
Node NO
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)
Orders entries of row by .
#define MueLu_runDroppingFunctors(...)
bool is_null() const