MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
12 
14 #include <Xpetra_CrsGraph.hpp>
15 #include <Xpetra_ImportFactory.hpp>
16 #include <Xpetra_ExportFactory.hpp>
17 #include <Xpetra_MapFactory.hpp>
18 #include <Xpetra_Map.hpp>
19 #include <Xpetra_Matrix.hpp>
20 #include <Xpetra_MultiVectorFactory.hpp>
21 #include <Xpetra_MultiVector.hpp>
22 #include <Xpetra_StridedMap.hpp>
23 #include <Xpetra_VectorFactory.hpp>
24 #include <Xpetra_Vector.hpp>
25 
26 #include <Xpetra_IO.hpp>
27 
28 #include <Kokkos_NestedSort.hpp>
29 #include <Kokkos_StdAlgorithms.hpp>
31 
32 #include "MueLu_AmalgamationFactory.hpp"
33 #include "MueLu_AmalgamationInfo.hpp"
34 #include "MueLu_Exceptions.hpp"
35 #include "MueLu_LWGraph.hpp"
36 
37 #include "MueLu_Level.hpp"
38 #include "MueLu_LWGraph.hpp"
39 #include "MueLu_MasterList.hpp"
40 #include "MueLu_Monitor.hpp"
41 #include "MueLu_PreDropFunctionConstVal.hpp"
42 #include "MueLu_Utilities.hpp"
43 
44 #ifdef HAVE_XPETRA_TPETRA
45 #include "Tpetra_CrsGraphTransposer.hpp"
46 #endif
47 
48 #include <algorithm>
49 #include <cstdlib>
50 #include <string>
51 
52 // If defined, read environment variables.
53 // Should be removed once we are confident that this works.
54 //#define DJS_READ_ENV_VARIABLES
55 
56 namespace MueLu {
57 
58 namespace Details {
59 template <class real_type, class LO>
60 struct DropTol {
61  DropTol() = default;
62  DropTol(DropTol const&) = default;
63  DropTol(DropTol&&) = default;
64 
65  DropTol& operator=(DropTol const&) = default;
66  DropTol& operator=(DropTol&&) = default;
67 
68  DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
69  : val{val_}
70  , diag{diag_}
71  , col{col_}
72  , drop{drop_} {}
73 
77  bool drop{true};
78 
79  // CMS: Auxillary information for debugging info
80  // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
81 };
82 } // namespace Details
83 
88 
89 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
91  RCP<ParameterList> validParamList = rcp(new ParameterList());
92 
93 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
94  SET_VALID_ENTRY("aggregation: drop tol");
95  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
96  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
97  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
98  SET_VALID_ENTRY("aggregation: row sum drop tol");
99  SET_VALID_ENTRY("aggregation: drop scheme");
100  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
101  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
102  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
103 
104  {
105  // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
106  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"))));
107  }
108  SET_VALID_ENTRY("aggregation: distance laplacian algo");
109  SET_VALID_ENTRY("aggregation: classical algo");
110  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
111 #undef SET_VALID_ENTRY
112  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
113 
114  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
115  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
116  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
117  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
118 
119  return validParamList;
120 }
121 
122 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
124  : predrop_(Teuchos::null) {}
125 
126 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
128  Input(currentLevel, "A");
129  Input(currentLevel, "UnAmalgamationInfo");
130 
131  const ParameterList& pL = GetParameterList();
132  if (pL.get<bool>("lightweight wrap") == true) {
133  std::string algo = pL.get<std::string>("aggregation: drop scheme");
134  if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
135  Input(currentLevel, "Coordinates");
136  }
137  if (algo == "signed classical sa")
138  ;
139  else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
140  Input(currentLevel, "BlockNumber");
141  }
142  }
143 }
144 
145 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
147  FactoryMonitor m(*this, "Build", currentLevel);
148 
149  typedef Teuchos::ScalarTraits<SC> STS;
150  typedef typename STS::magnitudeType real_type;
151  typedef Xpetra::MultiVector<real_type, LO, GO, NO> RealValuedMultiVector;
152  typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
153 
154  if (predrop_ != Teuchos::null)
155  GetOStream(Parameters0) << predrop_->description();
156 
157  RCP<Matrix> realA = Get<RCP<Matrix>>(currentLevel, "A");
158  RCP<AmalgamationInfo> amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
159  const ParameterList& pL = GetParameterList();
160  bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
161 
162  GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
163  std::string algo = pL.get<std::string>("aggregation: drop scheme");
164  const bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
165 
167  RCP<Matrix> A;
168 
169  bool use_block_algorithm = false;
170  LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
171  bool useSignedClassicalRS = false;
172  bool useSignedClassicalSA = false;
173  bool generateColoringGraph = false;
174 
175  // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
176  // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case
177  typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
178 
179  RCP<LocalOrdinalVector> ghostedBlockNumber;
180 
181  if (algo == "distance laplacian") {
182  // Grab the coordinates for distance laplacian
183  Coords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
184  A = realA;
185  } else if (algo == "signed classical sa") {
186  useSignedClassicalSA = true;
187  algo = "classical";
188  A = realA;
189  } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
190  useSignedClassicalRS = true;
191  // if(realA->GetFixedBlockSize() > 1) {
192  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
193  // Ghost the column block numbers if we need to
194  RCP<const Import> importer = realA->getCrsGraph()->getImporter();
195  if (!importer.is_null()) {
196  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
197  ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
198  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
199  } else {
200  ghostedBlockNumber = BlockNumber;
201  }
202  // }
203  if (algo == "block diagonal colored signed classical")
204  generateColoringGraph = true;
205  algo = "classical";
206  A = realA;
207 
208  } else if (algo == "block diagonal") {
209  // Handle the "block diagonal" filtering and then leave
210  BlockDiagonalize(currentLevel, realA, false);
211  return;
212  } else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
213  // Handle the "block diagonal" filtering, and then continue onward
214  use_block_algorithm = true;
215  RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel, realA, true);
216  if (algo == "block diagonal distance laplacian") {
217  // We now need to expand the coordinates by the interleaved blocksize
218  RCP<RealValuedMultiVector> OldCoords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
219  if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
220  LO dim = (LO)OldCoords->getNumVectors();
221  Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
222  for (LO k = 0; k < dim; k++) {
223  ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
224  ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
225  for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) {
226  LO new_base = i * dim;
227  for (LO j = 0; j < interleaved_blocksize; j++)
228  new_vec[new_base + j] = old_vec[i];
229  }
230  }
231  } else {
232  Coords = OldCoords;
233  }
234  algo = "distance laplacian";
235  } else if (algo == "block diagonal classical") {
236  algo = "classical";
237  }
238  // All cases
239  A = filteredMatrix;
240  rowSumTol = -1.0;
241  } else {
242  A = realA;
243  }
244 
245  // Distance Laplacian weights
246  Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
247  enum { NO_WEIGHTS = 0,
248  SINGLE_WEIGHTS,
249  BLOCK_WEIGHTS };
250  int use_dlap_weights = NO_WEIGHTS;
251  if (algo == "distance laplacian") {
252  LO dim = (LO)Coords->getNumVectors();
253  // If anything isn't 1.0 we need to turn on the weighting
254  bool non_unity = false;
255  for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
256  if (dlap_weights[i] != 1.0) {
257  non_unity = true;
258  }
259  }
260  if (non_unity) {
261  LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
262  if ((LO)dlap_weights.size() == dim)
263  use_dlap_weights = SINGLE_WEIGHTS;
264  else if ((LO)dlap_weights.size() == blocksize * dim)
265  use_dlap_weights = BLOCK_WEIGHTS;
266  else {
268  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
269  }
270  if (GetVerbLevel() & Statistics1)
271  GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
272  }
273  }
274 
275  // decide wether to use the fast-track code path for standard maps or the somewhat slower
276  // code path for non-standard maps
277  /*bool bNonStandardMaps = false;
278  if (A->IsView("stridedMaps") == true) {
279  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
280  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
281  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
282  if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
283  bNonStandardMaps = true;
284  }*/
285 
286  if (doExperimentalWrap) {
287  TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
288  TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
289 
290  SC threshold;
291  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
292  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
293  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
294  else
295  threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
296 
297  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
298  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
299  real_type realThreshold = STS::magnitude(threshold); // CMS: Rename this to "magnitude threshold" sometime
300 
302  // Remove this bit once we are confident that cut-based dropping works.
303 #ifdef HAVE_MUELU_DEBUG
304  int distanceLaplacianCutVerbose = 0;
305 #endif
306 #ifdef DJS_READ_ENV_VARIABLES
307  if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
308  distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
309  }
310 
311  if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
312  auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
313  realThreshold = 1e-4 * tmp;
314  }
315 
316 #ifdef HAVE_MUELU_DEBUG
317  if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
318  distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
319  }
320 #endif
321 #endif
322 
324  decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
325  decisionAlgoType classicalAlgo = defaultAlgo;
326  if (algo == "distance laplacian") {
327  if (distanceLaplacianAlgoStr == "default")
328  distanceLaplacianAlgo = defaultAlgo;
329  else if (distanceLaplacianAlgoStr == "unscaled cut")
330  distanceLaplacianAlgo = unscaled_cut;
331  else if (distanceLaplacianAlgoStr == "scaled cut")
332  distanceLaplacianAlgo = scaled_cut;
333  else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
334  distanceLaplacianAlgo = scaled_cut_symmetric;
335  else
336  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
337  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
338  } else if (algo == "classical") {
339  if (classicalAlgoStr == "default")
340  classicalAlgo = defaultAlgo;
341  else if (classicalAlgoStr == "unscaled cut")
342  classicalAlgo = unscaled_cut;
343  else if (classicalAlgoStr == "scaled cut")
344  classicalAlgo = scaled_cut;
345  else
346  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
347  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
348 
349  } else
350  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
351 
352  if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos)))
353  TEUCHOS_TEST_FOR_EXCEPTION(realThreshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
354 
355  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
356 
357  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
358 
359  // NOTE: We don't support signed classical RS or SA with cut drop at present
360  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
361  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
362 
363  GO numDropped = 0, numTotal = 0;
364  std::string graphType = "unamalgamated"; // for description purposes only
365 
366  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
367  BlockSize is the number of storage blocks that must kept together during the amalgamation process.
368 
369  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
370 
371  numPDEs = BlockSize * storageblocksize.
372 
373  If numPDEs==1
374  Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1
375  No other values makes sense.
376 
377  If numPDEs>1
378  If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs.
379  If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n.
380  Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested.
381  */
382  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
383  const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
384 
385  /************************** RS or SA-style Classical Dropping (and variants) **************************/
386  if (algo == "classical") {
387  if (predrop_ == null) {
388  // ap: this is a hack: had to declare predrop_ as mutable
389  predrop_ = rcp(new PreDropFunctionConstVal(threshold));
390  }
391 
392  if (predrop_ != null) {
393  RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
394  TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
395  "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
396  // If a user provided a predrop function, it overwrites the XML threshold parameter
397  SC newt = predropConstVal->GetThreshold();
398  if (newt != threshold) {
399  GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
400  threshold = newt;
401  }
402  }
403  // At this points we either have
404  // (predrop_ != null)
405  // Therefore, it is sufficient to check only threshold
406  if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
407  // Case 1: scalar problem, no dropping => just use matrix graph
408  RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
409  // Detect and record rows that correspond to Dirichlet boundary conditions
410  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
411  if (rowSumTol > 0.)
412  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
413 
414  graph->SetBoundaryNodeMap(boundaryNodes);
415  numTotal = A->getLocalNumEntries();
416 
417  if (GetVerbLevel() & Statistics1) {
418  GO numLocalBoundaryNodes = 0;
419  GO numGlobalBoundaryNodes = 0;
420  for (size_t i = 0; i < boundaryNodes.size(); ++i)
421  if (boundaryNodes[i])
422  numLocalBoundaryNodes++;
423  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
424  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
425  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
426  }
427 
428  Set(currentLevel, "DofsPerNode", 1);
429  Set(currentLevel, "Graph", graph);
430 
431  } else if ((BlockSize == 1 && threshold != STS::zero()) ||
432  (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
433  (BlockSize == 1 && useSignedClassicalRS) ||
434  (BlockSize == 1 && useSignedClassicalSA)) {
435  // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
436  // graph's map information, e.g., whether index is local
437  // OR a matrix without a CrsGraph
438 
439  // allocate space for the local graph
440  typename LWGraph::row_type::non_const_type rows("rows", A->getLocalNumRows() + 1);
441  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
442 
443  using MT = typename STS::magnitudeType;
444  RCP<Vector> ghostedDiag;
445  ArrayRCP<const SC> ghostedDiagVals;
446  ArrayRCP<const SC> negMaxOffDiagonal;
447  // RS style needs the max negative off-diagonal, SA style needs the diagonal
448  if (useSignedClassicalRS) {
449  if (ghostedBlockNumber.is_null()) {
451  negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
452  if (GetVerbLevel() & Statistics1)
453  GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
454  } else {
455  auto negMaxOffDiagonalVec = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber);
456  negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
457  if (GetVerbLevel() & Statistics1)
458  GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
459  }
460  } else {
462  if (classicalAlgo == defaultAlgo) {
463  ghostedDiagVals = ghostedDiag->getData(0);
464  }
465  }
466  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
467  if (rowSumTol > 0.) {
468  if (ghostedBlockNumber.is_null()) {
469  if (GetVerbLevel() & Statistics1)
470  GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
471  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
472  } else {
473  if (GetVerbLevel() & Statistics1)
474  GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
475  Utilities::ApplyRowSumCriterionHost(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
476  }
477  }
478 
479  ArrayRCP<const LO> g_block_id;
480  if (!ghostedBlockNumber.is_null())
481  g_block_id = ghostedBlockNumber->getData(0);
482 
483  LO realnnz = 0;
484  rows(0) = 0;
485  if (classicalAlgo == defaultAlgo) {
486  SubFactoryMonitor m1(*this, "Classical RS/SA", currentLevel);
487  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
488  size_t nnz = A->getNumEntriesInLocalRow(row);
489  bool rowIsDirichlet = boundaryNodes[row];
490  ArrayView<const LO> indices;
491  ArrayView<const SC> vals;
492  A->getLocalRowView(row, indices, vals);
493 
494  // FIXME the current predrop function uses the following
495  // FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
496  // FIXME but the threshold doesn't take into account the rows' diagonal entries
497  // FIXME For now, hardwiring the dropping in here
498 
499  LO rownnz = 0;
500  if (useSignedClassicalRS) {
501  // Signed classical RS style
502  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
503  LO col = indices[colID];
504  MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
505  MT neg_aij = -STS::real(vals[colID]);
506  /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
507  g_block_id.is_null() ? -1 : g_block_id[row],
508  g_block_id.is_null() ? -1 : g_block_id[col],
509  neg_aij, max_neg_aik);*/
510  if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
511  columns[realnnz++] = col;
512  rownnz++;
513  } else
514  numDropped++;
515  }
516  rows(row + 1) = realnnz;
517  } else if (useSignedClassicalSA) {
518  // Signed classical SA style
519  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
520  LO col = indices[colID];
521 
522  bool is_nonpositive = STS::real(vals[colID]) <= 0;
523  MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
524  MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
525  /*
526  if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID],
527  vals[colID],aij, aiiajj);
528  */
529 
530  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
531  columns(realnnz++) = col;
532  rownnz++;
533  } else
534  numDropped++;
535  }
536  rows[row + 1] = realnnz;
537  } else {
538  // Standard abs classical
539  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
540  LO col = indices[colID];
541  MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
542  MT aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2
543 
544  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
545  columns(realnnz++) = col;
546  rownnz++;
547  } else
548  numDropped++;
549  }
550  rows(row + 1) = realnnz;
551  }
552  } // end for row
553  } else {
554  /* Cut Algorithm */
555  SubFactoryMonitor m1(*this, "Cut Drop", currentLevel);
556  using ExecSpace = typename Node::execution_space;
557  using TeamPol = Kokkos::TeamPolicy<ExecSpace>;
558  using TeamMem = typename TeamPol::member_type;
559 #if KOKKOS_VERSION >= 40799
560  using ATS = KokkosKernels::ArithTraits<Scalar>;
561 #else
562  using ATS = Kokkos::ArithTraits<Scalar>;
563 #endif
564  using impl_scalar_type = typename ATS::val_type;
565 #if KOKKOS_VERSION >= 40799
566  using implATS = KokkosKernels::ArithTraits<impl_scalar_type>;
567 #else
568  using implATS = Kokkos::ArithTraits<impl_scalar_type>;
569 #endif
570 
571  // move from host to device
572  auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getLocalViewDevice(Xpetra::Access::ReadOnly), Kokkos::ALL(), 0);
573  auto thresholdKokkos = static_cast<impl_scalar_type>(threshold);
574  auto realThresholdKokkos = implATS::magnitude(thresholdKokkos);
575  auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns);
576 
577  auto A_device = A->getLocalMatrixDevice();
578  RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
579  RCP<const Import> importer = A->getCrsGraph()->getImporter();
581  RCP<LocalOrdinalVector> boundaryColumnVector;
582  for (size_t i = 0; i < graph->GetNodeNumVertices(); i++) {
583  boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
584  }
585  if (!importer.is_null()) {
586  boundaryColumnVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetImportMap());
587  boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
588  } else {
589  boundaryColumnVector = boundaryNodesVector;
590  }
591  auto boundaryColumn = boundaryColumnVector->getLocalViewDevice(Xpetra::Access::ReadOnly);
592  auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0);
593 
594  Kokkos::View<LO*, ExecSpace> rownnzView("rownnzView", A_device.numRows());
595  auto drop_views = Kokkos::View<bool*, ExecSpace>("drop_views", A_device.nnz());
596  auto index_views = Kokkos::View<size_t*, ExecSpace>("index_views", A_device.nnz());
597 
598  Kokkos::parallel_reduce(
599  "classical_cut", TeamPol(A_device.numRows(), Kokkos::AUTO), KOKKOS_LAMBDA(const TeamMem& teamMember, LO& globalnnz, GO& totalDropped) {
600  LO row = teamMember.league_rank();
601  auto rowView = A_device.rowConst(row);
602  size_t nnz = rowView.length;
603 
604  auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
605  auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
606 
607  // find magnitudes
608  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (LO)nnz), [&](const LO colID) {
609  index_view(colID) = colID;
610  LO col = rowView.colidx(colID);
611  // ignore diagonals for now, they are checked again later
612  // Don't aggregate boundaries
613  if (row == col || boundary(col)) {
614  drop_view(colID) = true;
615  } else {
616  drop_view(colID) = false;
617  }
618  });
619 
620  size_t dropStart = nnz;
621  if (classicalAlgo == unscaled_cut) {
622  // push diagonals and boundaries to the right, sort everything else by aij on the left
623  Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
624  if (drop_view(x) || drop_view(y)) {
625  return drop_view(x) < drop_view(y);
626  } else {
627  auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
628  auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
629  return x_aij > y_aij;
630  }
631  });
632 
633  // find index where dropping starts
634  Kokkos::parallel_reduce(
635  Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
636  auto const& x = index_view(i - 1);
637  auto const& y = index_view(i);
638  typename implATS::magnitudeType x_aij = 0;
639  typename implATS::magnitudeType y_aij = 0;
640  if (!drop_view(x)) {
641  x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
642  }
643  if (!drop_view(y)) {
644  y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
645  }
646 
647  if (realThresholdKokkos * realThresholdKokkos * x_aij > y_aij) {
648  if (i < min) {
649  min = i;
650  }
651  }
652  },
653  Kokkos::Min<size_t>(dropStart));
654  } else if (classicalAlgo == scaled_cut) {
655  // push diagonals and boundaries to the right, sort everything else by aij/aiiajj on the left
656  Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
657  if (drop_view(x) || drop_view(y)) {
658  return drop_view(x) < drop_view(y);
659  } else {
660  auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
661  auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
662  auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
663  auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
664  return (x_aij / x_aiiajj) > (y_aij / y_aiiajj);
665  }
666  });
667 
668  // find index where dropping starts
669  Kokkos::parallel_reduce(
670  Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
671  auto const& x = index_view(i - 1);
672  auto const& y = index_view(i);
673  typename implATS::magnitudeType x_val = 0;
674  typename implATS::magnitudeType y_val = 0;
675  if (!drop_view(x)) {
676  typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
677  typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
678  x_val = x_aij / x_aiiajj;
679  }
680  if (!drop_view(y)) {
681  typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
682  typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
683  y_val = y_aij / y_aiiajj;
684  }
685 
686  if (realThresholdKokkos * realThresholdKokkos * x_val > y_val) {
687  if (i < min) {
688  min = i;
689  }
690  }
691  },
692  Kokkos::Min<size_t>(dropStart));
693  }
694 
695  // drop everything to the right of where values stop passing threshold
696  if (dropStart < nnz) {
697  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](size_t i) {
698  drop_view(index_view(i)) = true;
699  });
700  }
701 
702  LO rownnz = 0;
703  GO rowDropped = 0;
704  Kokkos::parallel_reduce(
705  Kokkos::TeamThreadRange(teamMember, nnz), [=](const size_t idxID, LO& keep, GO& drop) {
706  LO col = rowView.colidx(idxID);
707  // don't drop diagonal
708  if (row == col || !drop_view(idxID)) {
709  columnsDevice(A_device.graph.row_map(row) + idxID) = col;
710  keep++;
711  } else {
712  columnsDevice(A_device.graph.row_map(row) + idxID) = -1;
713  drop++;
714  }
715  },
716  rownnz, rowDropped);
717 
718  globalnnz += rownnz;
719  totalDropped += rowDropped;
720  rownnzView(row) = rownnz;
721  },
722  realnnz, numDropped);
723 
724  // update column indices so that kept indices are aligned to the left for subview that happens later on
725  Kokkos::Experimental::remove(ExecSpace(), columnsDevice, -1);
726  Kokkos::deep_copy(columns, columnsDevice);
727 
728  // update row indices by adding up new # of nnz in each row
729  auto rowsDevice = Kokkos::create_mirror_view(ExecSpace(), rows);
730  Kokkos::parallel_scan(
731  Kokkos::RangePolicy<ExecSpace>(0, A_device.numRows()), KOKKOS_LAMBDA(const int i, LO& partial_sum, bool is_final) {
732  partial_sum += rownnzView(i);
733  if (is_final) rowsDevice(i + 1) = partial_sum;
734  });
735  Kokkos::deep_copy(rows, rowsDevice);
736  }
737 
738  numTotal = A->getLocalNumEntries();
739 
740  if (aggregationMayCreateDirichlet) {
741  // If the only element remaining after filtering is diagonal, mark node as boundary
742  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
743  if (rows[row + 1] - rows[row] <= 1)
744  boundaryNodes[row] = true;
745  }
746  }
747 
748  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "thresholded graph of A"));
749  graph->SetBoundaryNodeMap(boundaryNodes);
750  if (GetVerbLevel() & Statistics1) {
751  GO numLocalBoundaryNodes = 0;
752  GO numGlobalBoundaryNodes = 0;
753  for (size_t i = 0; i < boundaryNodes.size(); ++i)
754  if (boundaryNodes(i))
755  numLocalBoundaryNodes++;
756  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
757  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
758  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
759  }
760  Set(currentLevel, "Graph", graph);
761  Set(currentLevel, "DofsPerNode", 1);
762 
763  // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
764  if (generateColoringGraph) {
765  RCP<LWGraph> colorGraph;
766  RCP<const Import> importer = A->getCrsGraph()->getImporter();
767  BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer);
768  Set(currentLevel, "Coloring Graph", colorGraph);
769  // #define CMS_DUMP
770 #ifdef CMS_DUMP
771  {
772  Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_regular_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
773  Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_color_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
774  // int rank = graph->GetDomainMap()->getComm()->getRank();
775  // {
776  // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
777  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
778  // colorGraph->print(*fancy,Debug);
779  // }
780  // {
781  // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
782  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
783  // graph->print(*fancy,Debug);
784  // }
785  }
786 #endif
787  } // end generateColoringGraph
788  } else if (BlockSize > 1 && threshold == STS::zero()) {
789  // Case 3: Multiple DOF/node problem without dropping
790  const RCP<const Map> rowMap = A->getRowMap();
791  const RCP<const Map> colMap = A->getColMap();
792 
793  graphType = "amalgamated";
794 
795  // build node row map (uniqueMap) and node column map (nonUniqueMap)
796  // the arrays rowTranslation and colTranslation contain the local node id
797  // given a local dof id. The data is calculated by the AmalgamationFactory and
798  // stored in the variable container "UnAmalgamationInfo"
799  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
800  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
801  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
802  Array<LO> colTranslation = *(amalInfo->getColTranslation());
803 
804  // get number of local nodes
805  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
806 
807  // Allocate space for the local graph
808  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
809  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
810 
811  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
812  Kokkos::deep_copy(amalgBoundaryNodes, false);
813 
814  // Detect and record rows that correspond to Dirichlet boundary conditions
815  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
816  // TODO the array one bigger than the number of local rows, and the last entry can
817  // TODO hold the actual number of boundary nodes. Clever, huh?
818  ArrayRCP<bool> pointBoundaryNodes;
819  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows(*A, dirichletThreshold));
820  if (rowSumTol > 0.)
821  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
822 
823  // extract striding information
824  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
825  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
826  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
827  if (A->IsView("stridedMaps") == true) {
828  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
829  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
830  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
831  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
832  blkId = strMap->getStridedBlockId();
833  if (blkId > -1)
834  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
835  }
836 
837  // loop over all local nodes
838  LO realnnz = 0;
839  rows(0) = 0;
840  Array<LO> indicesExtra;
841  for (LO row = 0; row < numRows; row++) {
842  ArrayView<const LO> indices;
843  indicesExtra.resize(0);
844 
845  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
846  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
847  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
848  // with local ids.
849  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
850  // node.
851  bool isBoundary = false;
852  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
853  for (LO j = 0; j < blkPartSize; j++) {
854  if (pointBoundaryNodes[row * blkPartSize + j]) {
855  isBoundary = true;
856  break;
857  }
858  }
859  } else {
860  isBoundary = true;
861  for (LO j = 0; j < blkPartSize; j++) {
862  if (!pointBoundaryNodes[row * blkPartSize + j]) {
863  isBoundary = false;
864  break;
865  }
866  }
867  }
868 
869  // Merge rows of A
870  // The array indicesExtra contains local column node ids for the current local node "row"
871  if (!isBoundary)
872  MergeRows(*A, row, indicesExtra, colTranslation);
873  else
874  indicesExtra.push_back(row);
875  indices = indicesExtra;
876  numTotal += indices.size();
877 
878  // add the local column node ids to the full columns array which
879  // contains the local column node ids for all local node rows
880  LO nnz = indices.size(), rownnz = 0;
881  for (LO colID = 0; colID < nnz; colID++) {
882  LO col = indices[colID];
883  columns(realnnz++) = col;
884  rownnz++;
885  }
886 
887  if (rownnz == 1) {
888  // If the only element remaining after filtering is diagonal, mark node as boundary
889  // FIXME: this should really be replaced by the following
890  // if (indices.size() == 1 && indices[0] == row)
891  // boundaryNodes[row] = true;
892  // We do not do it this way now because there is no framework for distinguishing isolated
893  // and boundary nodes in the aggregation algorithms
894  amalgBoundaryNodes[row] = true;
895  }
896  rows(row + 1) = realnnz;
897  } // for (LO row = 0; row < numRows; row++)
898 
899  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
900  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
901 
902  if (GetVerbLevel() & Statistics1) {
903  GO numLocalBoundaryNodes = 0;
904  GO numGlobalBoundaryNodes = 0;
905 
906  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
907  if (amalgBoundaryNodes(i))
908  numLocalBoundaryNodes++;
909 
910  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
911  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
912  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
913  << " agglomerated Dirichlet nodes" << std::endl;
914  }
915 
916  Set(currentLevel, "Graph", graph);
917  Set(currentLevel, "DofsPerNode", blkSize); // full block size
918 
919  } else if (BlockSize > 1 && threshold != STS::zero()) {
920  // Case 4: Multiple DOF/node problem with dropping
921  const RCP<const Map> rowMap = A->getRowMap();
922  const RCP<const Map> colMap = A->getColMap();
923  graphType = "amalgamated";
924 
925  // build node row map (uniqueMap) and node column map (nonUniqueMap)
926  // the arrays rowTranslation and colTranslation contain the local node id
927  // given a local dof id. The data is calculated by the AmalgamationFactory and
928  // stored in the variable container "UnAmalgamationInfo"
929  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
930  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
931  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
932  Array<LO> colTranslation = *(amalInfo->getColTranslation());
933 
934  // get number of local nodes
935  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
936 
937  // Allocate space for the local graph
938  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
939  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
940 
941  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
942  Kokkos::deep_copy(amalgBoundaryNodes, false);
943 
944  // Detect and record rows that correspond to Dirichlet boundary conditions
945  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
946  // TODO the array one bigger than the number of local rows, and the last entry can
947  // TODO hold the actual number of boundary nodes. Clever, huh?
948  auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
949  if (rowSumTol > 0.)
950  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
951 
952  // extract striding information
953  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
954  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
955  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
956  if (A->IsView("stridedMaps") == true) {
957  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
958  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
959  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
960  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
961  blkId = strMap->getStridedBlockId();
962  if (blkId > -1)
963  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
964  }
965 
966  // extract diagonal data for dropping strategy
968  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
969 
970  // loop over all local nodes
971  LO realnnz = 0;
972  rows[0] = 0;
973  Array<LO> indicesExtra;
974  for (LO row = 0; row < numRows; row++) {
975  ArrayView<const LO> indices;
976  indicesExtra.resize(0);
977 
978  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
979  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
980  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
981  // with local ids.
982  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
983  // node.
984  bool isBoundary = false;
985  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
986  for (LO j = 0; j < blkPartSize; j++) {
987  if (pointBoundaryNodes[row * blkPartSize + j]) {
988  isBoundary = true;
989  break;
990  }
991  }
992  } else {
993  isBoundary = true;
994  for (LO j = 0; j < blkPartSize; j++) {
995  if (!pointBoundaryNodes[row * blkPartSize + j]) {
996  isBoundary = false;
997  break;
998  }
999  }
1000  }
1001 
1002  // Merge rows of A
1003  // The array indicesExtra contains local column node ids for the current local node "row"
1004  if (!isBoundary)
1005  MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
1006  else
1007  indicesExtra.push_back(row);
1008  indices = indicesExtra;
1009  numTotal += indices.size();
1010 
1011  // add the local column node ids to the full columns array which
1012  // contains the local column node ids for all local node rows
1013  LO nnz = indices.size(), rownnz = 0;
1014  for (LO colID = 0; colID < nnz; colID++) {
1015  LO col = indices[colID];
1016  columns[realnnz++] = col;
1017  rownnz++;
1018  }
1019 
1020  if (rownnz == 1) {
1021  // If the only element remaining after filtering is diagonal, mark node as boundary
1022  // FIXME: this should really be replaced by the following
1023  // if (indices.size() == 1 && indices[0] == row)
1024  // boundaryNodes[row] = true;
1025  // We do not do it this way now because there is no framework for distinguishing isolated
1026  // and boundary nodes in the aggregation algorithms
1027  amalgBoundaryNodes[row] = true;
1028  }
1029  rows[row + 1] = realnnz;
1030  } // for (LO row = 0; row < numRows; row++)
1031  // columns.resize(realnnz);
1032 
1033  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1034  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1035 
1036  if (GetVerbLevel() & Statistics1) {
1037  GO numLocalBoundaryNodes = 0;
1038  GO numGlobalBoundaryNodes = 0;
1039 
1040  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1041  if (amalgBoundaryNodes(i))
1042  numLocalBoundaryNodes++;
1043 
1044  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1045  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1046  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
1047  << " agglomerated Dirichlet nodes" << std::endl;
1048  }
1049 
1050  Set(currentLevel, "Graph", graph);
1051  Set(currentLevel, "DofsPerNode", blkSize); // full block size
1052  }
1053 
1054  } else if (algo == "distance laplacian") {
1055  LO blkSize = A->GetFixedBlockSize();
1056  GO indexBase = A->getRowMap()->getIndexBase();
1057  // [*0*] : FIXME
1058  // ap: somehow, if I move this line to [*1*], Belos throws an error
1059  // I'm not sure what's going on. Do we always have to Get data, if we did
1060  // DeclareInput for it?
1061  // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
1062 
1063  // Detect and record rows that correspond to Dirichlet boundary conditions
1064  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
1065  // TODO the array one bigger than the number of local rows, and the last entry can
1066  // TODO hold the actual number of boundary nodes. Clever, huh?
1067  auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1068  if (rowSumTol > 0.)
1069  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
1070 
1071  if ((blkSize == 1) && (threshold == STS::zero())) {
1072  // Trivial case: scalar problem, no dropping. Can return original graph
1073  RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
1074  graph->SetBoundaryNodeMap(pointBoundaryNodes);
1075  graphType = "unamalgamated";
1076  numTotal = A->getLocalNumEntries();
1077 
1078  if (GetVerbLevel() & Statistics1) {
1079  GO numLocalBoundaryNodes = 0;
1080  GO numGlobalBoundaryNodes = 0;
1081  for (size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1082  if (pointBoundaryNodes(i))
1083  numLocalBoundaryNodes++;
1084  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1085  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1086  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1087  }
1088 
1089  Set(currentLevel, "DofsPerNode", blkSize);
1090  Set(currentLevel, "Graph", graph);
1091 
1092  } else {
1093  // ap: We make quite a few assumptions here; general case may be a lot different,
1094  // but much much harder to implement. We assume that:
1095  // 1) all maps are standard maps, not strided maps
1096  // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
1097  // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
1098  //
1099  // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
1100  // but as I totally don't understand that code, here is my solution
1101 
1102  // [*1*]: see [*0*]
1103 
1104  // Check that the number of local coordinates is consistent with the #rows in A
1105  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() / blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1106  "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size (" << blkSize << ").");
1107 
1108  const RCP<const Map> colMap = A->getColMap();
1109  RCP<const Map> uniqueMap, nonUniqueMap;
1110  Array<LO> colTranslation;
1111  if (blkSize == 1) {
1112  uniqueMap = A->getRowMap();
1113  nonUniqueMap = A->getColMap();
1114  graphType = "unamalgamated";
1115 
1116  } else {
1117  uniqueMap = Coords->getMap();
1118  TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1119  "Different index bases for matrix and coordinates");
1120 
1121  AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1122 
1123  graphType = "amalgamated";
1124  }
1125  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1126 
1127  RCP<RealValuedMultiVector> ghostedCoords;
1128  RCP<Vector> ghostedLaplDiag;
1129  Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1130  if (threshold != STS::zero()) {
1131  // Get ghost coordinates
1132  RCP<const Import> importer;
1133  {
1134  SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1135  if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1136  GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1137  importer = realA->getCrsGraph()->getImporter();
1138  } else {
1139  GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1140  importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1141  }
1142  } // subtimer
1143  ghostedCoords = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(nonUniqueMap, Coords->getNumVectors());
1144  {
1145  SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1146  ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1147  } // subtimer
1148 
1149  // Construct Distance Laplacian diagonal
1150  RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1151  Array<LO> indicesExtra;
1153  if (threshold != STS::zero()) {
1154  const size_t numVectors = ghostedCoords->getNumVectors();
1155  coordData.reserve(numVectors);
1156  for (size_t j = 0; j < numVectors; j++) {
1157  Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1158  coordData.push_back(tmpData);
1159  }
1160  }
1161  {
1162  SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1163  ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1164  for (LO row = 0; row < numRows; row++) {
1165  ArrayView<const LO> indices;
1166 
1167  if (blkSize == 1) {
1168  ArrayView<const SC> vals;
1169  A->getLocalRowView(row, indices, vals);
1170 
1171  } else {
1172  // Merge rows of A
1173  indicesExtra.resize(0);
1174  MergeRows(*A, row, indicesExtra, colTranslation);
1175  indices = indicesExtra;
1176  }
1177 
1178  LO nnz = indices.size();
1179  bool haveAddedToDiag = false;
1180  for (LO colID = 0; colID < nnz; colID++) {
1181  const LO col = indices[colID];
1182 
1183  if (row != col) {
1184  if (use_dlap_weights == SINGLE_WEIGHTS) {
1185  /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1186  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1187  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1188  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1189  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1190  int block_id = row % interleaved_blocksize;
1191  int block_start = block_id * interleaved_blocksize;
1192  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1193  } else {
1194  // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1195  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1196  }
1197  haveAddedToDiag = true;
1198  }
1199  }
1200  // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1201  // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1202  if (!haveAddedToDiag)
1203  localLaplDiagData[row] = STS::squareroot(STS::rmax());
1204  }
1205  } // subtimer
1206  {
1207  SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1208  ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1209  ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1210  ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1211  } // subtimer
1212 
1213  } else {
1214  GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1215  }
1216 
1217  // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1218 
1219  // allocate space for the local graph
1220  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
1221  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
1222 
1223 #ifdef HAVE_MUELU_DEBUG
1224  // DEBUGGING
1225  for (LO i = 0; i < (LO)columns.size(); i++) columns[i] = -666;
1226 #endif
1227 
1228  // Extra array for if we're allowing symmetrization with cutting
1229  ArrayRCP<LO> rows_stop;
1230  bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1231  if (use_stop_array)
1232  // rows_stop = typename LWGraph::row_type::non_const_type("rows_stop", numRows);
1233  rows_stop.resize(numRows);
1234 
1235  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
1236  Kokkos::deep_copy(amalgBoundaryNodes, false);
1237 
1238  LO realnnz = 0;
1239  rows(0) = 0;
1240 
1241  Array<LO> indicesExtra;
1242  {
1243  SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1245  if (threshold != STS::zero()) {
1246  const size_t numVectors = ghostedCoords->getNumVectors();
1247  coordData.reserve(numVectors);
1248  for (size_t j = 0; j < numVectors; j++) {
1249  Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1250  coordData.push_back(tmpData);
1251  }
1252  }
1253 
1254  ArrayView<const SC> vals; // CMS hackery
1255  for (LO row = 0; row < numRows; row++) {
1256  ArrayView<const LO> indices;
1257  indicesExtra.resize(0);
1258  bool isBoundary = false;
1259 
1260  if (blkSize == 1) {
1261  // ArrayView<const SC> vals;//CMS uncomment
1262  A->getLocalRowView(row, indices, vals);
1263  isBoundary = pointBoundaryNodes[row];
1264  } else {
1265  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1266  isBoundary = true;
1267  for (LO j = 0; j < blkSize; j++) {
1268  if (!pointBoundaryNodes[row * blkSize + j]) {
1269  isBoundary = false;
1270  break;
1271  }
1272  }
1273 
1274  // Merge rows of A
1275  if (!isBoundary)
1276  MergeRows(*A, row, indicesExtra, colTranslation);
1277  else
1278  indicesExtra.push_back(row);
1279  indices = indicesExtra;
1280  }
1281  numTotal += indices.size();
1282 
1283  LO nnz = indices.size(), rownnz = 0;
1284 
1285  if (use_stop_array) {
1286  rows(row + 1) = rows(row) + nnz;
1287  realnnz = rows(row);
1288  }
1289 
1290  if (threshold != STS::zero()) {
1291  // default
1292  if (distanceLaplacianAlgo == defaultAlgo) {
1293  /* Standard Distance Laplacian */
1294  for (LO colID = 0; colID < nnz; colID++) {
1295  LO col = indices[colID];
1296 
1297  if (row == col) {
1298  columns(realnnz++) = col;
1299  rownnz++;
1300  continue;
1301  }
1302 
1303  // We do not want the distance Laplacian aggregating boundary nodes
1304  if (isBoundary) continue;
1305 
1306  SC laplVal;
1307  if (use_dlap_weights == SINGLE_WEIGHTS) {
1308  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1309  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1310  int block_id = row % interleaved_blocksize;
1311  int block_start = block_id * interleaved_blocksize;
1312  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1313  } else {
1314  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1315  }
1316  real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1317  real_type aij = STS::magnitude(laplVal * laplVal);
1318 
1319  if (aij > aiiajj) {
1320  columns(realnnz++) = col;
1321  rownnz++;
1322  } else {
1323  numDropped++;
1324  }
1325  }
1326  } else {
1327  /* Cut Algorithm */
1328  using DropTol = Details::DropTol<real_type, LO>;
1329  std::vector<DropTol> drop_vec;
1330  drop_vec.reserve(nnz);
1331  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1332  const real_type one = Teuchos::ScalarTraits<real_type>::one();
1333 
1334  // find magnitudes
1335  for (LO colID = 0; colID < nnz; colID++) {
1336  LO col = indices[colID];
1337 
1338  if (row == col) {
1339  drop_vec.emplace_back(zero, one, colID, false);
1340  continue;
1341  }
1342  // We do not want the distance Laplacian aggregating boundary nodes
1343  if (isBoundary) continue;
1344 
1345  SC laplVal;
1346  if (use_dlap_weights == SINGLE_WEIGHTS) {
1347  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1348  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1349  int block_id = row % interleaved_blocksize;
1350  int block_start = block_id * interleaved_blocksize;
1351  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1352  } else {
1353  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1354  }
1355 
1356  real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1357  real_type aij = STS::magnitude(laplVal * laplVal);
1358 
1359  drop_vec.emplace_back(aij, aiiajj, colID, false);
1360  }
1361 
1362  const size_t n = drop_vec.size();
1363 
1364  if (distanceLaplacianAlgo == unscaled_cut) {
1365  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1366  return a.val > b.val;
1367  });
1368 
1369  bool drop = false;
1370  for (size_t i = 1; i < n; ++i) {
1371  if (!drop) {
1372  auto const& x = drop_vec[i - 1];
1373  auto const& y = drop_vec[i];
1374  auto a = x.val;
1375  auto b = y.val;
1376  if (realThreshold * realThreshold * a > b) {
1377  drop = true;
1378 #ifdef HAVE_MUELU_DEBUG
1379  if (distanceLaplacianCutVerbose) {
1380  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1381  }
1382 #endif
1383  }
1384  }
1385  drop_vec[i].drop = drop;
1386  }
1387  } else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1388  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1389  return a.val / a.diag > b.val / b.diag;
1390  });
1391 
1392  bool drop = false;
1393  for (size_t i = 1; i < n; ++i) {
1394  if (!drop) {
1395  auto const& x = drop_vec[i - 1];
1396  auto const& y = drop_vec[i];
1397  auto a = x.val / x.diag;
1398  auto b = y.val / y.diag;
1399  if (realThreshold * realThreshold * a > b) {
1400  drop = true;
1401 #ifdef HAVE_MUELU_DEBUG
1402  if (distanceLaplacianCutVerbose) {
1403  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1404  }
1405 #endif
1406  }
1407  }
1408  drop_vec[i].drop = drop;
1409  }
1410  }
1411 
1412  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1413  return a.col < b.col;
1414  });
1415 
1416  for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) {
1417  LO col = indices[drop_vec[idxID].col];
1418 
1419  // don't drop diagonal
1420  if (row == col) {
1421  columns(realnnz++) = col;
1422  rownnz++;
1423  // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1424  continue;
1425  }
1426 
1427  if (!drop_vec[idxID].drop) {
1428  columns(realnnz++) = col;
1429  // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1430  rownnz++;
1431  } else {
1432  // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1433  numDropped++;
1434  }
1435  }
1436  }
1437  } else {
1438  // Skip laplace calculation and threshold comparison for zero threshold
1439  for (LO colID = 0; colID < nnz; colID++) {
1440  LO col = indices[colID];
1441  columns(realnnz++) = col;
1442  rownnz++;
1443  }
1444  }
1445 
1446  if (rownnz == 1) {
1447  // If the only element remaining after filtering is diagonal, mark node as boundary
1448  // FIXME: this should really be replaced by the following
1449  // if (indices.size() == 1 && indices[0] == row)
1450  // boundaryNodes[row] = true;
1451  // We do not do it this way now because there is no framework for distinguishing isolated
1452  // and boundary nodes in the aggregation algorithms
1453  amalgBoundaryNodes[row] = true;
1454  }
1455 
1456  if (use_stop_array)
1457  rows_stop[row] = rownnz + rows[row];
1458  else
1459  rows[row + 1] = realnnz;
1460  } // for (LO row = 0; row < numRows; row++)
1461 
1462  } // subtimer
1463 
1464  if (use_stop_array) {
1465  // Do symmetrization of the cut matrix
1466  // NOTE: We assume nested row/column maps here
1467  for (LO row = 0; row < numRows; row++) {
1468  for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1469  LO col = columns[colidx];
1470  if (col >= numRows) continue;
1471 
1472  bool found = false;
1473  for (LO t_col = rows(col); !found && t_col < rows_stop[col]; t_col++) {
1474  if (columns[t_col] == row)
1475  found = true;
1476  }
1477  // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1478  // into a Dirichlet unknown. In that case don't.
1479  if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1480  LO new_idx = rows_stop[col];
1481  // printf("(%d,%d) SYMADD entry\n",col,row);
1482  columns[new_idx] = row;
1483  rows_stop[col]++;
1484  numDropped--;
1485  }
1486  }
1487  }
1488 
1489  // Condense everything down
1490  LO current_start = 0;
1491  for (LO row = 0; row < numRows; row++) {
1492  LO old_start = current_start;
1493  for (LO col = rows(row); col < rows_stop[row]; col++) {
1494  if (current_start != col) {
1495  columns(current_start) = columns(col);
1496  }
1497  current_start++;
1498  }
1499  rows[row] = old_start;
1500  }
1501  rows(numRows) = realnnz = current_start;
1502  }
1503 
1504  RCP<LWGraph> graph;
1505  {
1506  SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1507  graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1508  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1509  } // subtimer
1510 
1511  if (GetVerbLevel() & Statistics1) {
1512  GO numLocalBoundaryNodes = 0;
1513  GO numGlobalBoundaryNodes = 0;
1514 
1515  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1516  if (amalgBoundaryNodes(i))
1517  numLocalBoundaryNodes++;
1518 
1519  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1520  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1521  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1522  << " using threshold " << dirichletThreshold << std::endl;
1523  }
1524 
1525  Set(currentLevel, "Graph", graph);
1526  Set(currentLevel, "DofsPerNode", blkSize);
1527  }
1528  }
1529 
1530  if (GetVerbLevel() & Statistics1) {
1531  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1532  GO numGlobalTotal, numGlobalDropped;
1533  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1534  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1535  GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1536  if (numGlobalTotal != 0)
1537  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1538  GetOStream(Statistics1) << std::endl;
1539  }
1540 
1541  } else {
1542  // what Tobias has implemented
1543 
1544  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1545  // GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1546  GetOStream(Runtime0) << "algorithm = \""
1547  << "failsafe"
1548  << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1549  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1550 
1551  RCP<const Map> rowMap = A->getRowMap();
1552  RCP<const Map> colMap = A->getColMap();
1553 
1554  LO blockdim = 1; // block dim for fixed size blocks
1555  GO indexBase = rowMap->getIndexBase(); // index base of maps
1556  GO offset = 0;
1557 
1558  // 1) check for blocking/striding information
1559  if (A->IsView("stridedMaps") &&
1560  Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1561  Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1562  RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1563  TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1564  blockdim = strMap->getFixedBlockSize();
1565  offset = strMap->getOffset();
1566  oldView = A->SwitchToView(oldView);
1567  GetOStream(Statistics1) << "CoalesceDropFactory::Build():"
1568  << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1569  } else
1570  GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1571 
1572  // 2) get row map for amalgamated matrix (graph of A)
1573  // with same distribution over all procs as row map of A
1574  RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1575  GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1576 
1577  // 3) create graph of amalgamated matrix
1578  RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1579 
1580  LO numRows = A->getRowMap()->getLocalNumElements();
1581  LO numNodes = nodeMap->getLocalNumElements();
1582  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numNodes);
1583  Kokkos::deep_copy(amalgBoundaryNodes, false);
1584  const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1585  bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1586 
1587  // 4) do amalgamation. generate graph of amalgamated matrix
1588  // Note, this code is much more inefficient than the leightwight implementation
1589  // Most of the work has already been done in the AmalgamationFactory
1590  for (LO row = 0; row < numRows; row++) {
1591  // get global DOF id
1592  GO grid = rowMap->getGlobalElement(row);
1593 
1594  // reinitialize boolean helper variable
1595  bIsDiagonalEntry = false;
1596 
1597  // translate grid to nodeid
1598  GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1599 
1600  size_t nnz = A->getNumEntriesInLocalRow(row);
1603  A->getLocalRowView(row, indices, vals);
1604 
1605  RCP<std::vector<GO>> cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1606  LO realnnz = 0;
1607  for (LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1608  GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1609 
1610  if (vals[col] != STS::zero()) {
1611  GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1612  cnodeIds->push_back(cnodeId);
1613  realnnz++; // increment number of nnz in matrix row
1614  if (grid == gcid) bIsDiagonalEntry = true;
1615  }
1616  }
1617 
1618  if (realnnz == 1 && bIsDiagonalEntry == true) {
1619  LO lNodeId = nodeMap->getLocalElement(nodeId);
1620  numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1621  if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1622  amalgBoundaryNodes[lNodeId] = true;
1623  }
1624 
1625  Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp(cnodeIds);
1626 
1627  if (arr_cnodeIds.size() > 0)
1628  crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1629  }
1630  // fill matrix graph
1631  crsGraph->fillComplete(nodeMap, nodeMap);
1632 
1633  // 5) create MueLu Graph object
1634  RCP<LWGraph> graph = rcp(new LWGraph(crsGraph, "amalgamated graph of A"));
1635 
1636  // Detect and record rows that correspond to Dirichlet boundary conditions
1637  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1638 
1639  if (GetVerbLevel() & Statistics1) {
1640  GO numLocalBoundaryNodes = 0;
1641  GO numGlobalBoundaryNodes = 0;
1642  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1643  if (amalgBoundaryNodes(i))
1644  numLocalBoundaryNodes++;
1645  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1646  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1647  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1648  }
1649 
1650  // 6) store results in Level
1651  // graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1652  Set(currentLevel, "DofsPerNode", blockdim);
1653  Set(currentLevel, "Graph", graph);
1654 
1655  } // if (doExperimentalWrap) ... else ...
1656 
1657 } // Build
1658 
1659 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1660 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1661  typedef typename ArrayView<const LO>::size_type size_type;
1662 
1663  // extract striding information
1664  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1665  if (A.IsView("stridedMaps") == true) {
1666  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1667  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1668  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1669  if (strMap->getStridedBlockId() > -1)
1670  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1671  }
1672 
1673  // count nonzero entries in all dof rows associated with node row
1674  size_t nnz = 0, pos = 0;
1675  for (LO j = 0; j < blkSize; j++)
1676  nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1677 
1678  if (nnz == 0) {
1679  cols.resize(0);
1680  return;
1681  }
1682 
1683  cols.resize(nnz);
1684 
1685  // loop over all local dof rows associated with local node "row"
1686  ArrayView<const LO> inds;
1687  ArrayView<const SC> vals;
1688  for (LO j = 0; j < blkSize; j++) {
1689  A.getLocalRowView(row * blkSize + j, inds, vals);
1690  size_type numIndices = inds.size();
1691 
1692  if (numIndices == 0) // skip empty dof rows
1693  continue;
1694 
1695  // cols: stores all local node ids for current local node id "row"
1696  cols[pos++] = translation[inds[0]];
1697  for (size_type k = 1; k < numIndices; k++) {
1698  LO nodeID = translation[inds[k]];
1699  // Here we try to speed up the process by reducing the size of an array
1700  // to sort. This works if the column nonzeros belonging to the same
1701  // node are stored consequently.
1702  if (nodeID != cols[pos - 1])
1703  cols[pos++] = nodeID;
1704  }
1705  }
1706  cols.resize(pos);
1707  nnz = pos;
1708 
1709  // Sort and remove duplicates
1710  std::sort(cols.begin(), cols.end());
1711  pos = 0;
1712  for (size_t j = 1; j < nnz; j++)
1713  if (cols[j] != cols[pos])
1714  cols[++pos] = cols[j];
1715  cols.resize(pos + 1);
1716 }
1717 
1718 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1719 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1720  typedef typename ArrayView<const LO>::size_type size_type;
1721  typedef Teuchos::ScalarTraits<SC> STS;
1722 
1723  // extract striding information
1724  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1725  if (A.IsView("stridedMaps") == true) {
1726  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1727  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1728  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1729  if (strMap->getStridedBlockId() > -1)
1730  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1731  }
1732 
1733  // count nonzero entries in all dof rows associated with node row
1734  size_t nnz = 0, pos = 0;
1735  for (LO j = 0; j < blkSize; j++)
1736  nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1737 
1738  if (nnz == 0) {
1739  cols.resize(0);
1740  return;
1741  }
1742 
1743  cols.resize(nnz);
1744 
1745  // loop over all local dof rows associated with local node "row"
1746  ArrayView<const LO> inds;
1747  ArrayView<const SC> vals;
1748  for (LO j = 0; j < blkSize; j++) {
1749  A.getLocalRowView(row * blkSize + j, inds, vals);
1750  size_type numIndices = inds.size();
1751 
1752  if (numIndices == 0) // skip empty dof rows
1753  continue;
1754 
1755  // cols: stores all local node ids for current local node id "row"
1756  LO prevNodeID = -1;
1757  for (size_type k = 0; k < numIndices; k++) {
1758  LO dofID = inds[k];
1759  LO nodeID = translation[inds[k]];
1760 
1761  // we avoid a square root by using squared values
1762  typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]); // eps^2 * |a_ii| * |a_jj|
1763  typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]);
1764 
1765  // check dropping criterion
1766  if (aij > aiiajj || (row * blkSize + j == dofID)) {
1767  // accept entry in graph
1768 
1769  // Here we try to speed up the process by reducing the size of an array
1770  // to sort. This works if the column nonzeros belonging to the same
1771  // node are stored consequently.
1772  if (nodeID != prevNodeID) {
1773  cols[pos++] = nodeID;
1774  prevNodeID = nodeID;
1775  }
1776  }
1777  }
1778  }
1779  cols.resize(pos);
1780  nnz = pos;
1781 
1782  // Sort and remove duplicates
1783  std::sort(cols.begin(), cols.end());
1784  pos = 0;
1785  for (size_t j = 1; j < nnz; j++)
1786  if (cols[j] != cols[pos])
1787  cols[++pos] = cols[j];
1788  cols.resize(pos + 1);
1789 
1790  return;
1791 }
1792 
1793 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1795  typedef Teuchos::ScalarTraits<SC> STS;
1796 
1797  const ParameterList& pL = GetParameterList();
1798  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1799  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1800 
1801  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
1802  RCP<LocalOrdinalVector> ghostedBlockNumber;
1803  GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
1804 
1805  // Ghost the column block numbers if we need to
1806  RCP<const Import> importer = A->getCrsGraph()->getImporter();
1807  if (!importer.is_null()) {
1808  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1809  ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
1810  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1811  } else {
1812  ghostedBlockNumber = BlockNumber;
1813  }
1814 
1815  // Accessors for block numbers
1816  Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1817  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1818 
1819  // allocate space for the local graph
1820  typename CrsMatrix::local_matrix_type::row_map_type::host_mirror_type::non_const_type rows_mat;
1821  typename LWGraph::row_type::non_const_type rows_graph;
1822  typename LWGraph::entries_type::non_const_type columns;
1823  typename CrsMatrix::local_matrix_type::values_type::host_mirror_type::non_const_type values;
1824  RCP<CrsMatrixWrap> crs_matrix_wrap;
1825 
1826  if (generate_matrix) {
1827  crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1828  rows_mat = typename CrsMatrix::local_matrix_type::row_map_type::host_mirror_type::non_const_type("rows_mat", A->getLocalNumRows() + 1);
1829  } else {
1830  rows_graph = typename LWGraph::row_type::non_const_type("rows_graph", A->getLocalNumRows() + 1);
1831  }
1832  columns = typename LWGraph::entries_type::non_const_type("columns", A->getLocalNumEntries());
1833  values = typename CrsMatrix::local_matrix_type::values_type::host_mirror_type::non_const_type("values", A->getLocalNumEntries());
1834 
1835  LO realnnz = 0;
1836  GO numDropped = 0, numTotal = 0;
1837  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1838  LO row_block = row_block_number[row];
1839  size_t nnz = A->getNumEntriesInLocalRow(row);
1840  ArrayView<const LO> indices;
1841  ArrayView<const SC> vals;
1842  A->getLocalRowView(row, indices, vals);
1843 
1844  LO rownnz = 0;
1845  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1846  LO col = indices[colID];
1847  LO col_block = col_block_number[col];
1848 
1849  if (row_block == col_block) {
1850  if (generate_matrix) values[realnnz] = vals[colID];
1851  columns[realnnz++] = col;
1852  rownnz++;
1853  } else
1854  numDropped++;
1855  }
1856  if (generate_matrix)
1857  rows_mat[row + 1] = realnnz;
1858  else
1859  rows_graph[row + 1] = realnnz;
1860  }
1861 
1862  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1863  if (rowSumTol > 0.)
1864  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
1865 
1866  numTotal = A->getLocalNumEntries();
1867 
1868  if (GetVerbLevel() & Statistics1) {
1869  GO numLocalBoundaryNodes = 0;
1870  GO numGlobalBoundaryNodes = 0;
1871  for (size_t i = 0; i < boundaryNodes.size(); ++i)
1872  if (boundaryNodes(i))
1873  numLocalBoundaryNodes++;
1874  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1875  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1876  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1877 
1878  GO numGlobalTotal, numGlobalDropped;
1879  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1880  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1881  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1882  if (numGlobalTotal != 0)
1883  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1884  GetOStream(Statistics1) << std::endl;
1885  }
1886 
1887  Set(currentLevel, "Filtering", true);
1888 
1889  if (generate_matrix) {
1890  // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1891  // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1892  // here, which is legit, because we never use them anyway.
1893  if constexpr (std::is_same<typename LWGraph::row_type,
1894  typename CrsMatrix::local_matrix_type::row_map_type>::value) {
1895  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values);
1896  } else {
1897  auto rows_mat2 = typename CrsMatrix::local_matrix_type::row_map_type::non_const_type("rows_mat2", rows_mat.extent(0));
1898  Kokkos::deep_copy(rows_mat2, rows_mat);
1899  auto columns2 = typename CrsMatrix::local_graph_type::entries_type::non_const_type("columns2", columns.extent(0));
1900  Kokkos::deep_copy(columns2, columns);
1901  auto values2 = typename CrsMatrix::local_matrix_type::values_type::non_const_type("values2", values.extent(0));
1902  Kokkos::deep_copy(values2, values);
1903  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat2, columns2, values2);
1904  }
1905  crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1906  } else {
1907  RCP<LWGraph> graph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1908  graph->SetBoundaryNodeMap(boundaryNodes);
1909  Set(currentLevel, "Graph", graph);
1910  }
1911 
1912  Set(currentLevel, "DofsPerNode", 1);
1913  return crs_matrix_wrap;
1914 }
1915 
1916 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1918  TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1919  const ParameterList& pL = GetParameterList();
1920 
1921  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1922 
1923  GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1924  if (localizeColoringGraph)
1925  GetOStream(Statistics1) << ", with localization" << std::endl;
1926  else
1927  GetOStream(Statistics1) << ", without localization" << std::endl;
1928 
1929  // Accessors for block numbers
1930  Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1931  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1932 
1933  // allocate space for the local graph
1934  ArrayRCP<size_t> rows_mat;
1935  typename LWGraph::row_type::non_const_type rows_graph("rows_graph", inputGraph->GetNodeNumVertices() + 1);
1936  typename LWGraph::entries_type::non_const_type columns("columns", inputGraph->GetNodeNumEdges());
1937 
1938  LO realnnz = 0;
1939  GO numDropped = 0, numTotal = 0;
1940  const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getLocalNumElements());
1941  if (localizeColoringGraph) {
1942  for (LO row = 0; row < numRows; ++row) {
1943  LO row_block = row_block_number[row];
1944  auto indices = inputGraph->getNeighborVertices(row);
1945 
1946  LO rownnz = 0;
1947  for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1948  LO col = indices(colID);
1949  LO col_block = col_block_number[col];
1950 
1951  if ((row_block == col_block) && (col < numRows)) {
1952  columns(realnnz++) = col;
1953  rownnz++;
1954  } else
1955  numDropped++;
1956  }
1957  rows_graph(row + 1) = realnnz;
1958  }
1959  } else {
1960  // ghosting of boundary node map
1961  auto boundaryNodes = inputGraph->GetBoundaryNodeMap();
1963  for (size_t i = 0; i < inputGraph->GetNodeNumVertices(); i++)
1964  boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1965  // Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1967  boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
1968  auto boundaryColumn = boundaryColumnVector->getData(0);
1969 
1970  for (LO row = 0; row < numRows; ++row) {
1971  LO row_block = row_block_number[row];
1972  auto indices = inputGraph->getNeighborVertices(row);
1973 
1974  LO rownnz = 0;
1975  for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1976  LO col = indices(colID);
1977  LO col_block = col_block_number[col];
1978 
1979  if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1980  columns(realnnz++) = col;
1981  rownnz++;
1982  } else
1983  numDropped++;
1984  }
1985  rows_graph(row + 1) = realnnz;
1986  }
1987  }
1988 
1989  numTotal = inputGraph->GetNodeNumEdges();
1990 
1991  if (GetVerbLevel() & Statistics1) {
1992  RCP<const Teuchos::Comm<int>> comm = inputGraph->GetDomainMap()->getComm();
1993  GO numGlobalTotal, numGlobalDropped;
1994  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1995  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1996  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1997  if (numGlobalTotal != 0)
1998  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1999  GetOStream(Statistics1) << std::endl;
2000  }
2001 
2002  if (localizeColoringGraph) {
2003  outputGraph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
2004  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
2005  } else {
2006  TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
2007 #ifdef HAVE_XPETRA_TPETRA
2008  auto outputGraph2 = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
2009 
2010  auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
2012  auto tpGraphSym = sym->symmetrize();
2013  auto lclGraphSym = tpGraphSym->getLocalGraphHost();
2014  auto colIndsSym = lclGraphSym.entries;
2015 
2016  auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
2017  typename LWGraph::row_type::non_const_type rows_graphSym("rows_graphSym", rowsSym.size());
2018  for (size_t row = 0; row < rowsSym.size(); row++)
2019  rows_graphSym(row) = rowsSym(row);
2020  outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
2021  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
2022 #endif
2023  }
2024 }
2025 
2026 } // namespace MueLu
2027 
2028 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
Important warning messages (one line)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Exception indicating invalid cast attempted.
void reserve(size_type n)
void BlockDiagonalizeGraph(const RCP< LWGraph > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< LWGraph > &outputGraph, RCP< const Import > &importer) const
static Teuchos::RCP< Vector > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
#define MueLu_sumAll(rcpComm, in, out)
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
void setValidator(RCP< const ParameterEntryValidator > const &validator)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
Print more statistics.
size_type size() const
DropTol & operator=(DropTol const &)=default
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
LocalOrdinal LO
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
size_type size() const
Exception throws to report incompatible objects (like maps).
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
Scalar GetThreshold() const
Return threshold value.
Additional warnings.
void resize(const size_type n, const T &val=T())
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
#define SET_VALID_ENTRY(name)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void resize(size_type new_size, const value_type &x=value_type())
void Build(Level &currentLevel) const
Build an object with this factory.
std::string viewLabel_t
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
iterator end()
void DeclareInput(Level &currentLevel) const
Input.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
Print class parameters.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > toTpetra(const RCP< const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &graph)
void push_back(const value_type &x)
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
RCP< const CrsGraph< int, GlobalOrdinal, Node > > toXpetra(const Epetra_CrsGraph &g)
size_type size() const
Scalar SC
const RCP< const Map > GetDomainMap() const
Lightweight MueLu representation of a compressed row storage graph.
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
ParameterEntry & getEntry(const std::string &name)
bool is_null() const
iterator begin()
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
bool is_null() const