MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
12 
14 #include <Xpetra_CrsGraph.hpp>
15 #include <Xpetra_ImportFactory.hpp>
16 #include <Xpetra_ExportFactory.hpp>
17 #include <Xpetra_MapFactory.hpp>
18 #include <Xpetra_Map.hpp>
19 #include <Xpetra_Matrix.hpp>
20 #include <Xpetra_MultiVectorFactory.hpp>
21 #include <Xpetra_MultiVector.hpp>
22 #include <Xpetra_StridedMap.hpp>
23 #include <Xpetra_VectorFactory.hpp>
24 #include <Xpetra_Vector.hpp>
25 
26 #include <Xpetra_IO.hpp>
27 
28 #include <Kokkos_NestedSort.hpp>
29 #include <Kokkos_StdAlgorithms.hpp>
31 
32 #include "MueLu_AmalgamationFactory.hpp"
33 #include "MueLu_AmalgamationInfo.hpp"
34 #include "MueLu_Exceptions.hpp"
35 #include "MueLu_LWGraph.hpp"
36 
37 #include "MueLu_Level.hpp"
38 #include "MueLu_LWGraph.hpp"
39 #include "MueLu_MasterList.hpp"
40 #include "MueLu_Monitor.hpp"
41 #include "MueLu_PreDropFunctionConstVal.hpp"
42 #include "MueLu_Utilities.hpp"
43 
44 #ifdef HAVE_XPETRA_TPETRA
45 #include "Tpetra_CrsGraphTransposer.hpp"
46 #endif
47 
48 #include <algorithm>
49 #include <cstdlib>
50 #include <string>
51 
52 // If defined, read environment variables.
53 // Should be removed once we are confident that this works.
54 //#define DJS_READ_ENV_VARIABLES
55 
56 namespace MueLu {
57 
58 namespace Details {
59 template <class real_type, class LO>
60 struct DropTol {
61  DropTol() = default;
62  DropTol(DropTol const&) = default;
63  DropTol(DropTol&&) = default;
64 
65  DropTol& operator=(DropTol const&) = default;
66  DropTol& operator=(DropTol&&) = default;
67 
68  DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
69  : val{val_}
70  , diag{diag_}
71  , col{col_}
72  , drop{drop_} {}
73 
77  bool drop{true};
78 
79  // CMS: Auxillary information for debugging info
80  // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
81 };
82 } // namespace Details
83 
88 
89 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
91  RCP<ParameterList> validParamList = rcp(new ParameterList());
92 
93 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
94  SET_VALID_ENTRY("aggregation: drop tol");
95  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
96  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
97  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
98  SET_VALID_ENTRY("aggregation: row sum drop tol");
99  SET_VALID_ENTRY("aggregation: drop scheme");
100  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
101  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
102  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
103 
104  {
105  // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
106  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"))));
107  }
108  SET_VALID_ENTRY("aggregation: distance laplacian algo");
109  SET_VALID_ENTRY("aggregation: classical algo");
110  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
111 #undef SET_VALID_ENTRY
112  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
113 
114  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
115  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
116  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
117  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
118 
119  return validParamList;
120 }
121 
122 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
124  : predrop_(Teuchos::null) {}
125 
126 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
128  Input(currentLevel, "A");
129  Input(currentLevel, "UnAmalgamationInfo");
130 
131  const ParameterList& pL = GetParameterList();
132  if (pL.get<bool>("lightweight wrap") == true) {
133  std::string algo = pL.get<std::string>("aggregation: drop scheme");
134  if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
135  Input(currentLevel, "Coordinates");
136  }
137  if (algo == "signed classical sa")
138  ;
139  else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
140  Input(currentLevel, "BlockNumber");
141  }
142  }
143 }
144 
145 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
147  FactoryMonitor m(*this, "Build", currentLevel);
148 
149  typedef Teuchos::ScalarTraits<SC> STS;
150  typedef typename STS::magnitudeType real_type;
151  typedef Xpetra::MultiVector<real_type, LO, GO, NO> RealValuedMultiVector;
152  typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
153 
154  if (predrop_ != Teuchos::null)
155  GetOStream(Parameters0) << predrop_->description();
156 
157  RCP<Matrix> realA = Get<RCP<Matrix>>(currentLevel, "A");
158  RCP<AmalgamationInfo> amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
159  const ParameterList& pL = GetParameterList();
160  bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
161 
162  GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
163  std::string algo = pL.get<std::string>("aggregation: drop scheme");
164  const bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
165 
167  RCP<Matrix> A;
168 
169  bool use_block_algorithm = false;
170  LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
171  bool useSignedClassicalRS = false;
172  bool useSignedClassicalSA = false;
173  bool generateColoringGraph = false;
174 
175  // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
176  // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case
177  typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
178 
179  RCP<LocalOrdinalVector> ghostedBlockNumber;
180 
181  if (algo == "distance laplacian") {
182  // Grab the coordinates for distance laplacian
183  Coords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
184  A = realA;
185  } else if (algo == "signed classical sa") {
186  useSignedClassicalSA = true;
187  algo = "classical";
188  A = realA;
189  } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
190  useSignedClassicalRS = true;
191  // if(realA->GetFixedBlockSize() > 1) {
192  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
193  // Ghost the column block numbers if we need to
194  RCP<const Import> importer = realA->getCrsGraph()->getImporter();
195  if (!importer.is_null()) {
196  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
197  ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
198  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
199  } else {
200  ghostedBlockNumber = BlockNumber;
201  }
202  // }
203  if (algo == "block diagonal colored signed classical")
204  generateColoringGraph = true;
205  algo = "classical";
206  A = realA;
207 
208  } else if (algo == "block diagonal") {
209  // Handle the "block diagonal" filtering and then leave
210  BlockDiagonalize(currentLevel, realA, false);
211  return;
212  } else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
213  // Handle the "block diagonal" filtering, and then continue onward
214  use_block_algorithm = true;
215  RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel, realA, true);
216  if (algo == "block diagonal distance laplacian") {
217  // We now need to expand the coordinates by the interleaved blocksize
218  RCP<RealValuedMultiVector> OldCoords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
219  if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
220  LO dim = (LO)OldCoords->getNumVectors();
221  Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
222  for (LO k = 0; k < dim; k++) {
223  ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
224  ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
225  for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) {
226  LO new_base = i * dim;
227  for (LO j = 0; j < interleaved_blocksize; j++)
228  new_vec[new_base + j] = old_vec[i];
229  }
230  }
231  } else {
232  Coords = OldCoords;
233  }
234  algo = "distance laplacian";
235  } else if (algo == "block diagonal classical") {
236  algo = "classical";
237  }
238  // All cases
239  A = filteredMatrix;
240  rowSumTol = -1.0;
241  } else {
242  A = realA;
243  }
244 
245  // Distance Laplacian weights
246  Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
247  enum { NO_WEIGHTS = 0,
248  SINGLE_WEIGHTS,
249  BLOCK_WEIGHTS };
250  int use_dlap_weights = NO_WEIGHTS;
251  if (algo == "distance laplacian") {
252  LO dim = (LO)Coords->getNumVectors();
253  // If anything isn't 1.0 we need to turn on the weighting
254  bool non_unity = false;
255  for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
256  if (dlap_weights[i] != 1.0) {
257  non_unity = true;
258  }
259  }
260  if (non_unity) {
261  LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
262  if ((LO)dlap_weights.size() == dim)
263  use_dlap_weights = SINGLE_WEIGHTS;
264  else if ((LO)dlap_weights.size() == blocksize * dim)
265  use_dlap_weights = BLOCK_WEIGHTS;
266  else {
268  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
269  }
270  if (GetVerbLevel() & Statistics1)
271  GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
272  }
273  }
274 
275  // decide wether to use the fast-track code path for standard maps or the somewhat slower
276  // code path for non-standard maps
277  /*bool bNonStandardMaps = false;
278  if (A->IsView("stridedMaps") == true) {
279  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
280  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
281  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
282  if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
283  bNonStandardMaps = true;
284  }*/
285 
286  if (doExperimentalWrap) {
287  TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
288  TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
289 
290  SC threshold;
291  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
292  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
293  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
294  else
295  threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
296 
297  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
298  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
299  real_type realThreshold = STS::magnitude(threshold); // CMS: Rename this to "magnitude threshold" sometime
300 
302  // Remove this bit once we are confident that cut-based dropping works.
303 #ifdef HAVE_MUELU_DEBUG
304  int distanceLaplacianCutVerbose = 0;
305 #endif
306 #ifdef DJS_READ_ENV_VARIABLES
307  if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
308  distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
309  }
310 
311  if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
312  auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
313  realThreshold = 1e-4 * tmp;
314  }
315 
316 #ifdef HAVE_MUELU_DEBUG
317  if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
318  distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
319  }
320 #endif
321 #endif
322 
324  decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
325  decisionAlgoType classicalAlgo = defaultAlgo;
326  if (algo == "distance laplacian") {
327  if (distanceLaplacianAlgoStr == "default")
328  distanceLaplacianAlgo = defaultAlgo;
329  else if (distanceLaplacianAlgoStr == "unscaled cut")
330  distanceLaplacianAlgo = unscaled_cut;
331  else if (distanceLaplacianAlgoStr == "scaled cut")
332  distanceLaplacianAlgo = scaled_cut;
333  else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
334  distanceLaplacianAlgo = scaled_cut_symmetric;
335  else
336  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
337  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
338  } else if (algo == "classical") {
339  if (classicalAlgoStr == "default")
340  classicalAlgo = defaultAlgo;
341  else if (classicalAlgoStr == "unscaled cut")
342  classicalAlgo = unscaled_cut;
343  else if (classicalAlgoStr == "scaled cut")
344  classicalAlgo = scaled_cut;
345  else
346  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
347  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
348 
349  } else
350  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
351  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
352 
353  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
354 
355  // NOTE: We don't support signed classical RS or SA with cut drop at present
356  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
357  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
358 
359  GO numDropped = 0, numTotal = 0;
360  std::string graphType = "unamalgamated"; // for description purposes only
361 
362  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
363  BlockSize is the number of storage blocks that must kept together during the amalgamation process.
364 
365  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
366 
367  numPDEs = BlockSize * storageblocksize.
368 
369  If numPDEs==1
370  Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1
371  No other values makes sense.
372 
373  If numPDEs>1
374  If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs.
375  If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n.
376  Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested.
377  */
378  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
379  const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
380 
381  /************************** RS or SA-style Classical Dropping (and variants) **************************/
382  if (algo == "classical") {
383  if (predrop_ == null) {
384  // ap: this is a hack: had to declare predrop_ as mutable
385  predrop_ = rcp(new PreDropFunctionConstVal(threshold));
386  }
387 
388  if (predrop_ != null) {
389  RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
390  TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
391  "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
392  // If a user provided a predrop function, it overwrites the XML threshold parameter
393  SC newt = predropConstVal->GetThreshold();
394  if (newt != threshold) {
395  GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
396  threshold = newt;
397  }
398  }
399  // At this points we either have
400  // (predrop_ != null)
401  // Therefore, it is sufficient to check only threshold
402  if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
403  // Case 1: scalar problem, no dropping => just use matrix graph
404  RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
405  // Detect and record rows that correspond to Dirichlet boundary conditions
406  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
407  if (rowSumTol > 0.)
408  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
409 
410  graph->SetBoundaryNodeMap(boundaryNodes);
411  numTotal = A->getLocalNumEntries();
412 
413  if (GetVerbLevel() & Statistics1) {
414  GO numLocalBoundaryNodes = 0;
415  GO numGlobalBoundaryNodes = 0;
416  for (size_t i = 0; i < boundaryNodes.size(); ++i)
417  if (boundaryNodes[i])
418  numLocalBoundaryNodes++;
419  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
420  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
421  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
422  }
423 
424  Set(currentLevel, "DofsPerNode", 1);
425  Set(currentLevel, "Graph", graph);
426 
427  } else if ((BlockSize == 1 && threshold != STS::zero()) ||
428  (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
429  (BlockSize == 1 && useSignedClassicalRS) ||
430  (BlockSize == 1 && useSignedClassicalSA)) {
431  // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
432  // graph's map information, e.g., whether index is local
433  // OR a matrix without a CrsGraph
434 
435  // allocate space for the local graph
436  typename LWGraph::row_type::non_const_type rows("rows", A->getLocalNumRows() + 1);
437  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
438 
439  using MT = typename STS::magnitudeType;
440  RCP<Vector> ghostedDiag;
441  ArrayRCP<const SC> ghostedDiagVals;
442  ArrayRCP<const MT> negMaxOffDiagonal;
443  // RS style needs the max negative off-diagonal, SA style needs the diagonal
444  if (useSignedClassicalRS) {
445  if (ghostedBlockNumber.is_null()) {
447  negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
448  if (GetVerbLevel() & Statistics1)
449  GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
450  } else {
451  auto negMaxOffDiagonalVec = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber);
452  negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
453  if (GetVerbLevel() & Statistics1)
454  GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
455  }
456  } else {
458  if (classicalAlgo == defaultAlgo) {
459  ghostedDiagVals = ghostedDiag->getData(0);
460  }
461  }
462  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
463  if (rowSumTol > 0.) {
464  if (ghostedBlockNumber.is_null()) {
465  if (GetVerbLevel() & Statistics1)
466  GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
467  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
468  } else {
469  if (GetVerbLevel() & Statistics1)
470  GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
471  Utilities::ApplyRowSumCriterionHost(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
472  }
473  }
474 
475  ArrayRCP<const LO> g_block_id;
476  if (!ghostedBlockNumber.is_null())
477  g_block_id = ghostedBlockNumber->getData(0);
478 
479  LO realnnz = 0;
480  rows(0) = 0;
481  if (classicalAlgo == defaultAlgo) {
482  SubFactoryMonitor m1(*this, "Classical RS/SA", currentLevel);
483  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
484  size_t nnz = A->getNumEntriesInLocalRow(row);
485  bool rowIsDirichlet = boundaryNodes[row];
486  ArrayView<const LO> indices;
487  ArrayView<const SC> vals;
488  A->getLocalRowView(row, indices, vals);
489 
490  // FIXME the current predrop function uses the following
491  // FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
492  // FIXME but the threshold doesn't take into account the rows' diagonal entries
493  // FIXME For now, hardwiring the dropping in here
494 
495  LO rownnz = 0;
496  if (useSignedClassicalRS) {
497  // Signed classical RS style
498  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
499  LO col = indices[colID];
500  MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
501  MT neg_aij = -STS::real(vals[colID]);
502  /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
503  g_block_id.is_null() ? -1 : g_block_id[row],
504  g_block_id.is_null() ? -1 : g_block_id[col],
505  neg_aij, max_neg_aik);*/
506  if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
507  columns[realnnz++] = col;
508  rownnz++;
509  } else
510  numDropped++;
511  }
512  rows(row + 1) = realnnz;
513  } else if (useSignedClassicalSA) {
514  // Signed classical SA style
515  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
516  LO col = indices[colID];
517 
518  bool is_nonpositive = STS::real(vals[colID]) <= 0;
519  MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
520  MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
521  /*
522  if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID],
523  vals[colID],aij, aiiajj);
524  */
525 
526  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
527  columns(realnnz++) = col;
528  rownnz++;
529  } else
530  numDropped++;
531  }
532  rows[row + 1] = realnnz;
533  } else {
534  // Standard abs classical
535  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
536  LO col = indices[colID];
537  MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
538  MT aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2
539 
540  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
541  columns(realnnz++) = col;
542  rownnz++;
543  } else
544  numDropped++;
545  }
546  rows(row + 1) = realnnz;
547  }
548  } // end for row
549  } else {
550  /* Cut Algorithm */
551  SubFactoryMonitor m1(*this, "Cut Drop", currentLevel);
552  using ExecSpace = typename Node::execution_space;
553  using TeamPol = Kokkos::TeamPolicy<ExecSpace>;
554  using TeamMem = typename TeamPol::member_type;
555  using ATS = Kokkos::ArithTraits<Scalar>;
556  using impl_scalar_type = typename ATS::val_type;
557  using implATS = Kokkos::ArithTraits<impl_scalar_type>;
558 
559  // move from host to device
560  auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadOnly), Kokkos::ALL(), 0);
561  auto thresholdKokkos = static_cast<impl_scalar_type>(threshold);
562  auto realThresholdKokkos = implATS::magnitude(thresholdKokkos);
563  auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns);
564 
565  auto A_device = A->getLocalMatrixDevice();
566  RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
567  RCP<const Import> importer = A->getCrsGraph()->getImporter();
569  RCP<LocalOrdinalVector> boundaryColumnVector;
570  for (size_t i = 0; i < graph->GetNodeNumVertices(); i++) {
571  boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
572  }
573  if (!importer.is_null()) {
574  boundaryColumnVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetImportMap());
575  boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
576  } else {
577  boundaryColumnVector = boundaryNodesVector;
578  }
579  auto boundaryColumn = boundaryColumnVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
580  auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0);
581 
582  Kokkos::View<LO*, ExecSpace> rownnzView("rownnzView", A_device.numRows());
583  auto drop_views = Kokkos::View<bool*, ExecSpace>("drop_views", A_device.nnz());
584  auto index_views = Kokkos::View<size_t*, ExecSpace>("index_views", A_device.nnz());
585 
586  Kokkos::parallel_reduce(
587  "classical_cut", TeamPol(A_device.numRows(), Kokkos::AUTO), KOKKOS_LAMBDA(const TeamMem& teamMember, LO& globalnnz, GO& totalDropped) {
588  LO row = teamMember.league_rank();
589  auto rowView = A_device.rowConst(row);
590  size_t nnz = rowView.length;
591 
592  auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
593  auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
594 
595  // find magnitudes
596  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (LO)nnz), [&](const LO colID) {
597  index_view(colID) = colID;
598  LO col = rowView.colidx(colID);
599  // ignore diagonals for now, they are checked again later
600  // Don't aggregate boundaries
601  if (row == col || boundary(col)) {
602  drop_view(colID) = true;
603  } else {
604  drop_view(colID) = false;
605  }
606  });
607 
608  size_t dropStart = nnz;
609  if (classicalAlgo == unscaled_cut) {
610  // push diagonals and boundaries to the right, sort everything else by aij on the left
611  Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
612  if (drop_view(x) || drop_view(y)) {
613  return drop_view(x) < drop_view(y);
614  } else {
615  auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
616  auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
617  return x_aij > y_aij;
618  }
619  });
620 
621  // find index where dropping starts
622  Kokkos::parallel_reduce(
623  Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
624  auto const& x = index_view(i - 1);
625  auto const& y = index_view(i);
626  typename implATS::magnitudeType x_aij = 0;
627  typename implATS::magnitudeType y_aij = 0;
628  if (!drop_view(x)) {
629  x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
630  }
631  if (!drop_view(y)) {
632  y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
633  }
634 
635  if (realThresholdKokkos * realThresholdKokkos * x_aij > y_aij) {
636  if (i < min) {
637  min = i;
638  }
639  }
640  },
641  Kokkos::Min<size_t>(dropStart));
642  } else if (classicalAlgo == scaled_cut) {
643  // push diagonals and boundaries to the right, sort everything else by aij/aiiajj on the left
644  Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
645  if (drop_view(x) || drop_view(y)) {
646  return drop_view(x) < drop_view(y);
647  } else {
648  auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
649  auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
650  auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
651  auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
652  return (x_aij / x_aiiajj) > (y_aij / y_aiiajj);
653  }
654  });
655 
656  // find index where dropping starts
657  Kokkos::parallel_reduce(
658  Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
659  auto const& x = index_view(i - 1);
660  auto const& y = index_view(i);
661  typename implATS::magnitudeType x_val = 0;
662  typename implATS::magnitudeType y_val = 0;
663  if (!drop_view(x)) {
664  typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
665  typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
666  x_val = x_aij / x_aiiajj;
667  }
668  if (!drop_view(y)) {
669  typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
670  typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
671  y_val = y_aij / y_aiiajj;
672  }
673 
674  if (realThresholdKokkos * realThresholdKokkos * x_val > y_val) {
675  if (i < min) {
676  min = i;
677  }
678  }
679  },
680  Kokkos::Min<size_t>(dropStart));
681  }
682 
683  // drop everything to the right of where values stop passing threshold
684  if (dropStart < nnz) {
685  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](size_t i) {
686  drop_view(index_view(i)) = true;
687  });
688  }
689 
690  LO rownnz = 0;
691  GO rowDropped = 0;
692  Kokkos::parallel_reduce(
693  Kokkos::TeamThreadRange(teamMember, nnz), [=](const size_t idxID, LO& keep, GO& drop) {
694  LO col = rowView.colidx(idxID);
695  // don't drop diagonal
696  if (row == col || !drop_view(idxID)) {
697  columnsDevice(A_device.graph.row_map(row) + idxID) = col;
698  keep++;
699  } else {
700  columnsDevice(A_device.graph.row_map(row) + idxID) = -1;
701  drop++;
702  }
703  },
704  rownnz, rowDropped);
705 
706  globalnnz += rownnz;
707  totalDropped += rowDropped;
708  rownnzView(row) = rownnz;
709  },
710  realnnz, numDropped);
711 
712  // update column indices so that kept indices are aligned to the left for subview that happens later on
713  Kokkos::Experimental::remove(ExecSpace(), columnsDevice, -1);
714  Kokkos::deep_copy(columns, columnsDevice);
715 
716  // update row indices by adding up new # of nnz in each row
717  auto rowsDevice = Kokkos::create_mirror_view(ExecSpace(), rows);
718  Kokkos::parallel_scan(
719  Kokkos::RangePolicy<ExecSpace>(0, A_device.numRows()), KOKKOS_LAMBDA(const int i, LO& partial_sum, bool is_final) {
720  partial_sum += rownnzView(i);
721  if (is_final) rowsDevice(i + 1) = partial_sum;
722  });
723  Kokkos::deep_copy(rows, rowsDevice);
724  }
725 
726  numTotal = A->getLocalNumEntries();
727 
728  if (aggregationMayCreateDirichlet) {
729  // If the only element remaining after filtering is diagonal, mark node as boundary
730  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
731  if (rows[row + 1] - rows[row] <= 1)
732  boundaryNodes[row] = true;
733  }
734  }
735 
736  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "thresholded graph of A"));
737  graph->SetBoundaryNodeMap(boundaryNodes);
738  if (GetVerbLevel() & Statistics1) {
739  GO numLocalBoundaryNodes = 0;
740  GO numGlobalBoundaryNodes = 0;
741  for (size_t i = 0; i < boundaryNodes.size(); ++i)
742  if (boundaryNodes(i))
743  numLocalBoundaryNodes++;
744  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
745  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
746  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
747  }
748  Set(currentLevel, "Graph", graph);
749  Set(currentLevel, "DofsPerNode", 1);
750 
751  // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
752  if (generateColoringGraph) {
753  RCP<LWGraph> colorGraph;
754  RCP<const Import> importer = A->getCrsGraph()->getImporter();
755  BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer);
756  Set(currentLevel, "Coloring Graph", colorGraph);
757  // #define CMS_DUMP
758 #ifdef CMS_DUMP
759  {
760  Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_regular_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
761  Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_color_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
762  // int rank = graph->GetDomainMap()->getComm()->getRank();
763  // {
764  // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
765  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
766  // colorGraph->print(*fancy,Debug);
767  // }
768  // {
769  // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
770  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
771  // graph->print(*fancy,Debug);
772  // }
773  }
774 #endif
775  } // end generateColoringGraph
776  } else if (BlockSize > 1 && threshold == STS::zero()) {
777  // Case 3: Multiple DOF/node problem without dropping
778  const RCP<const Map> rowMap = A->getRowMap();
779  const RCP<const Map> colMap = A->getColMap();
780 
781  graphType = "amalgamated";
782 
783  // build node row map (uniqueMap) and node column map (nonUniqueMap)
784  // the arrays rowTranslation and colTranslation contain the local node id
785  // given a local dof id. The data is calculated by the AmalgamationFactory and
786  // stored in the variable container "UnAmalgamationInfo"
787  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
788  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
789  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
790  Array<LO> colTranslation = *(amalInfo->getColTranslation());
791 
792  // get number of local nodes
793  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
794 
795  // Allocate space for the local graph
796  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
797  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
798 
799  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
800  Kokkos::deep_copy(amalgBoundaryNodes, false);
801 
802  // Detect and record rows that correspond to Dirichlet boundary conditions
803  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
804  // TODO the array one bigger than the number of local rows, and the last entry can
805  // TODO hold the actual number of boundary nodes. Clever, huh?
806  ArrayRCP<bool> pointBoundaryNodes;
807  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows(*A, dirichletThreshold));
808  if (rowSumTol > 0.)
809  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
810 
811  // extract striding information
812  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
813  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
814  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
815  if (A->IsView("stridedMaps") == true) {
816  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
817  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
818  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
819  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
820  blkId = strMap->getStridedBlockId();
821  if (blkId > -1)
822  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
823  }
824 
825  // loop over all local nodes
826  LO realnnz = 0;
827  rows(0) = 0;
828  Array<LO> indicesExtra;
829  for (LO row = 0; row < numRows; row++) {
830  ArrayView<const LO> indices;
831  indicesExtra.resize(0);
832 
833  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
834  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
835  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
836  // with local ids.
837  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
838  // node.
839  bool isBoundary = false;
840  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
841  for (LO j = 0; j < blkPartSize; j++) {
842  if (pointBoundaryNodes[row * blkPartSize + j]) {
843  isBoundary = true;
844  break;
845  }
846  }
847  } else {
848  isBoundary = true;
849  for (LO j = 0; j < blkPartSize; j++) {
850  if (!pointBoundaryNodes[row * blkPartSize + j]) {
851  isBoundary = false;
852  break;
853  }
854  }
855  }
856 
857  // Merge rows of A
858  // The array indicesExtra contains local column node ids for the current local node "row"
859  if (!isBoundary)
860  MergeRows(*A, row, indicesExtra, colTranslation);
861  else
862  indicesExtra.push_back(row);
863  indices = indicesExtra;
864  numTotal += indices.size();
865 
866  // add the local column node ids to the full columns array which
867  // contains the local column node ids for all local node rows
868  LO nnz = indices.size(), rownnz = 0;
869  for (LO colID = 0; colID < nnz; colID++) {
870  LO col = indices[colID];
871  columns(realnnz++) = col;
872  rownnz++;
873  }
874 
875  if (rownnz == 1) {
876  // If the only element remaining after filtering is diagonal, mark node as boundary
877  // FIXME: this should really be replaced by the following
878  // if (indices.size() == 1 && indices[0] == row)
879  // boundaryNodes[row] = true;
880  // We do not do it this way now because there is no framework for distinguishing isolated
881  // and boundary nodes in the aggregation algorithms
882  amalgBoundaryNodes[row] = true;
883  }
884  rows(row + 1) = realnnz;
885  } // for (LO row = 0; row < numRows; row++)
886 
887  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
888  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
889 
890  if (GetVerbLevel() & Statistics1) {
891  GO numLocalBoundaryNodes = 0;
892  GO numGlobalBoundaryNodes = 0;
893 
894  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
895  if (amalgBoundaryNodes(i))
896  numLocalBoundaryNodes++;
897 
898  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
899  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
900  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
901  << " agglomerated Dirichlet nodes" << std::endl;
902  }
903 
904  Set(currentLevel, "Graph", graph);
905  Set(currentLevel, "DofsPerNode", blkSize); // full block size
906 
907  } else if (BlockSize > 1 && threshold != STS::zero()) {
908  // Case 4: Multiple DOF/node problem with dropping
909  const RCP<const Map> rowMap = A->getRowMap();
910  const RCP<const Map> colMap = A->getColMap();
911  graphType = "amalgamated";
912 
913  // build node row map (uniqueMap) and node column map (nonUniqueMap)
914  // the arrays rowTranslation and colTranslation contain the local node id
915  // given a local dof id. The data is calculated by the AmalgamationFactory and
916  // stored in the variable container "UnAmalgamationInfo"
917  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
918  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
919  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
920  Array<LO> colTranslation = *(amalInfo->getColTranslation());
921 
922  // get number of local nodes
923  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
924 
925  // Allocate space for the local graph
926  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
927  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
928 
929  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
930  Kokkos::deep_copy(amalgBoundaryNodes, false);
931 
932  // Detect and record rows that correspond to Dirichlet boundary conditions
933  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
934  // TODO the array one bigger than the number of local rows, and the last entry can
935  // TODO hold the actual number of boundary nodes. Clever, huh?
936  auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
937  if (rowSumTol > 0.)
938  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
939 
940  // extract striding information
941  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
942  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
943  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
944  if (A->IsView("stridedMaps") == true) {
945  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
946  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
947  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
948  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
949  blkId = strMap->getStridedBlockId();
950  if (blkId > -1)
951  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
952  }
953 
954  // extract diagonal data for dropping strategy
956  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
957 
958  // loop over all local nodes
959  LO realnnz = 0;
960  rows[0] = 0;
961  Array<LO> indicesExtra;
962  for (LO row = 0; row < numRows; row++) {
963  ArrayView<const LO> indices;
964  indicesExtra.resize(0);
965 
966  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
967  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
968  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
969  // with local ids.
970  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
971  // node.
972  bool isBoundary = false;
973  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
974  for (LO j = 0; j < blkPartSize; j++) {
975  if (pointBoundaryNodes[row * blkPartSize + j]) {
976  isBoundary = true;
977  break;
978  }
979  }
980  } else {
981  isBoundary = true;
982  for (LO j = 0; j < blkPartSize; j++) {
983  if (!pointBoundaryNodes[row * blkPartSize + j]) {
984  isBoundary = false;
985  break;
986  }
987  }
988  }
989 
990  // Merge rows of A
991  // The array indicesExtra contains local column node ids for the current local node "row"
992  if (!isBoundary)
993  MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
994  else
995  indicesExtra.push_back(row);
996  indices = indicesExtra;
997  numTotal += indices.size();
998 
999  // add the local column node ids to the full columns array which
1000  // contains the local column node ids for all local node rows
1001  LO nnz = indices.size(), rownnz = 0;
1002  for (LO colID = 0; colID < nnz; colID++) {
1003  LO col = indices[colID];
1004  columns[realnnz++] = col;
1005  rownnz++;
1006  }
1007 
1008  if (rownnz == 1) {
1009  // If the only element remaining after filtering is diagonal, mark node as boundary
1010  // FIXME: this should really be replaced by the following
1011  // if (indices.size() == 1 && indices[0] == row)
1012  // boundaryNodes[row] = true;
1013  // We do not do it this way now because there is no framework for distinguishing isolated
1014  // and boundary nodes in the aggregation algorithms
1015  amalgBoundaryNodes[row] = true;
1016  }
1017  rows[row + 1] = realnnz;
1018  } // for (LO row = 0; row < numRows; row++)
1019  // columns.resize(realnnz);
1020 
1021  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1022  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1023 
1024  if (GetVerbLevel() & Statistics1) {
1025  GO numLocalBoundaryNodes = 0;
1026  GO numGlobalBoundaryNodes = 0;
1027 
1028  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1029  if (amalgBoundaryNodes(i))
1030  numLocalBoundaryNodes++;
1031 
1032  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1033  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1034  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
1035  << " agglomerated Dirichlet nodes" << std::endl;
1036  }
1037 
1038  Set(currentLevel, "Graph", graph);
1039  Set(currentLevel, "DofsPerNode", blkSize); // full block size
1040  }
1041 
1042  } else if (algo == "distance laplacian") {
1043  LO blkSize = A->GetFixedBlockSize();
1044  GO indexBase = A->getRowMap()->getIndexBase();
1045  // [*0*] : FIXME
1046  // ap: somehow, if I move this line to [*1*], Belos throws an error
1047  // I'm not sure what's going on. Do we always have to Get data, if we did
1048  // DeclareInput for it?
1049  // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
1050 
1051  // Detect and record rows that correspond to Dirichlet boundary conditions
1052  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
1053  // TODO the array one bigger than the number of local rows, and the last entry can
1054  // TODO hold the actual number of boundary nodes. Clever, huh?
1055  auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1056  if (rowSumTol > 0.)
1057  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
1058 
1059  if ((blkSize == 1) && (threshold == STS::zero())) {
1060  // Trivial case: scalar problem, no dropping. Can return original graph
1061  RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
1062  graph->SetBoundaryNodeMap(pointBoundaryNodes);
1063  graphType = "unamalgamated";
1064  numTotal = A->getLocalNumEntries();
1065 
1066  if (GetVerbLevel() & Statistics1) {
1067  GO numLocalBoundaryNodes = 0;
1068  GO numGlobalBoundaryNodes = 0;
1069  for (size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1070  if (pointBoundaryNodes(i))
1071  numLocalBoundaryNodes++;
1072  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1073  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1074  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1075  }
1076 
1077  Set(currentLevel, "DofsPerNode", blkSize);
1078  Set(currentLevel, "Graph", graph);
1079 
1080  } else {
1081  // ap: We make quite a few assumptions here; general case may be a lot different,
1082  // but much much harder to implement. We assume that:
1083  // 1) all maps are standard maps, not strided maps
1084  // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
1085  // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
1086  //
1087  // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
1088  // but as I totally don't understand that code, here is my solution
1089 
1090  // [*1*]: see [*0*]
1091 
1092  // Check that the number of local coordinates is consistent with the #rows in A
1093  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() / blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1094  "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size (" << blkSize << ").");
1095 
1096  const RCP<const Map> colMap = A->getColMap();
1097  RCP<const Map> uniqueMap, nonUniqueMap;
1098  Array<LO> colTranslation;
1099  if (blkSize == 1) {
1100  uniqueMap = A->getRowMap();
1101  nonUniqueMap = A->getColMap();
1102  graphType = "unamalgamated";
1103 
1104  } else {
1105  uniqueMap = Coords->getMap();
1106  TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1107  "Different index bases for matrix and coordinates");
1108 
1109  AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1110 
1111  graphType = "amalgamated";
1112  }
1113  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1114 
1115  RCP<RealValuedMultiVector> ghostedCoords;
1116  RCP<Vector> ghostedLaplDiag;
1117  Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1118  if (threshold != STS::zero()) {
1119  // Get ghost coordinates
1120  RCP<const Import> importer;
1121  {
1122  SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1123  if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1124  GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1125  importer = realA->getCrsGraph()->getImporter();
1126  } else {
1127  GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1128  importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1129  }
1130  } // subtimer
1131  ghostedCoords = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(nonUniqueMap, Coords->getNumVectors());
1132  {
1133  SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1134  ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1135  } // subtimer
1136 
1137  // Construct Distance Laplacian diagonal
1138  RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1139  Array<LO> indicesExtra;
1141  if (threshold != STS::zero()) {
1142  const size_t numVectors = ghostedCoords->getNumVectors();
1143  coordData.reserve(numVectors);
1144  for (size_t j = 0; j < numVectors; j++) {
1145  Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1146  coordData.push_back(tmpData);
1147  }
1148  }
1149  {
1150  SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1151  ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1152  for (LO row = 0; row < numRows; row++) {
1153  ArrayView<const LO> indices;
1154 
1155  if (blkSize == 1) {
1156  ArrayView<const SC> vals;
1157  A->getLocalRowView(row, indices, vals);
1158 
1159  } else {
1160  // Merge rows of A
1161  indicesExtra.resize(0);
1162  MergeRows(*A, row, indicesExtra, colTranslation);
1163  indices = indicesExtra;
1164  }
1165 
1166  LO nnz = indices.size();
1167  bool haveAddedToDiag = false;
1168  for (LO colID = 0; colID < nnz; colID++) {
1169  const LO col = indices[colID];
1170 
1171  if (row != col) {
1172  if (use_dlap_weights == SINGLE_WEIGHTS) {
1173  /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1174  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1175  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1176  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1177  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1178  int block_id = row % interleaved_blocksize;
1179  int block_start = block_id * interleaved_blocksize;
1180  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1181  } else {
1182  // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1183  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1184  }
1185  haveAddedToDiag = true;
1186  }
1187  }
1188  // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1189  // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1190  if (!haveAddedToDiag)
1191  localLaplDiagData[row] = STS::rmax();
1192  }
1193  } // subtimer
1194  {
1195  SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1196  ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1197  ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1198  ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1199  } // subtimer
1200 
1201  } else {
1202  GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1203  }
1204 
1205  // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1206 
1207  // allocate space for the local graph
1208  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
1209  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
1210 
1211 #ifdef HAVE_MUELU_DEBUG
1212  // DEBUGGING
1213  for (LO i = 0; i < (LO)columns.size(); i++) columns[i] = -666;
1214 #endif
1215 
1216  // Extra array for if we're allowing symmetrization with cutting
1217  ArrayRCP<LO> rows_stop;
1218  bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1219  if (use_stop_array)
1220  // rows_stop = typename LWGraph::row_type::non_const_type("rows_stop", numRows);
1221  rows_stop.resize(numRows);
1222 
1223  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
1224  Kokkos::deep_copy(amalgBoundaryNodes, false);
1225 
1226  LO realnnz = 0;
1227  rows(0) = 0;
1228 
1229  Array<LO> indicesExtra;
1230  {
1231  SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1233  if (threshold != STS::zero()) {
1234  const size_t numVectors = ghostedCoords->getNumVectors();
1235  coordData.reserve(numVectors);
1236  for (size_t j = 0; j < numVectors; j++) {
1237  Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1238  coordData.push_back(tmpData);
1239  }
1240  }
1241 
1242  ArrayView<const SC> vals; // CMS hackery
1243  for (LO row = 0; row < numRows; row++) {
1244  ArrayView<const LO> indices;
1245  indicesExtra.resize(0);
1246  bool isBoundary = false;
1247 
1248  if (blkSize == 1) {
1249  // ArrayView<const SC> vals;//CMS uncomment
1250  A->getLocalRowView(row, indices, vals);
1251  isBoundary = pointBoundaryNodes[row];
1252  } else {
1253  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1254  for (LO j = 0; j < blkSize; j++) {
1255  if (!pointBoundaryNodes[row * blkSize + j]) {
1256  isBoundary = false;
1257  break;
1258  }
1259  }
1260 
1261  // Merge rows of A
1262  if (!isBoundary)
1263  MergeRows(*A, row, indicesExtra, colTranslation);
1264  else
1265  indicesExtra.push_back(row);
1266  indices = indicesExtra;
1267  }
1268  numTotal += indices.size();
1269 
1270  LO nnz = indices.size(), rownnz = 0;
1271 
1272  if (use_stop_array) {
1273  rows(row + 1) = rows(row) + nnz;
1274  realnnz = rows(row);
1275  }
1276 
1277  if (threshold != STS::zero()) {
1278  // default
1279  if (distanceLaplacianAlgo == defaultAlgo) {
1280  /* Standard Distance Laplacian */
1281  for (LO colID = 0; colID < nnz; colID++) {
1282  LO col = indices[colID];
1283 
1284  if (row == col) {
1285  columns(realnnz++) = col;
1286  rownnz++;
1287  continue;
1288  }
1289 
1290  // We do not want the distance Laplacian aggregating boundary nodes
1291  if (isBoundary) continue;
1292 
1293  SC laplVal;
1294  if (use_dlap_weights == SINGLE_WEIGHTS) {
1295  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1296  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1297  int block_id = row % interleaved_blocksize;
1298  int block_start = block_id * interleaved_blocksize;
1299  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1300  } else {
1301  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1302  }
1303  real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1304  real_type aij = STS::magnitude(laplVal * laplVal);
1305 
1306  if (aij > aiiajj) {
1307  columns(realnnz++) = col;
1308  rownnz++;
1309  } else {
1310  numDropped++;
1311  }
1312  }
1313  } else {
1314  /* Cut Algorithm */
1315  using DropTol = Details::DropTol<real_type, LO>;
1316  std::vector<DropTol> drop_vec;
1317  drop_vec.reserve(nnz);
1318  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1319  const real_type one = Teuchos::ScalarTraits<real_type>::one();
1320 
1321  // find magnitudes
1322  for (LO colID = 0; colID < nnz; colID++) {
1323  LO col = indices[colID];
1324 
1325  if (row == col) {
1326  drop_vec.emplace_back(zero, one, colID, false);
1327  continue;
1328  }
1329  // We do not want the distance Laplacian aggregating boundary nodes
1330  if (isBoundary) continue;
1331 
1332  SC laplVal;
1333  if (use_dlap_weights == SINGLE_WEIGHTS) {
1334  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1335  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1336  int block_id = row % interleaved_blocksize;
1337  int block_start = block_id * interleaved_blocksize;
1338  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1339  } else {
1340  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1341  }
1342 
1343  real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1344  real_type aij = STS::magnitude(laplVal * laplVal);
1345 
1346  drop_vec.emplace_back(aij, aiiajj, colID, false);
1347  }
1348 
1349  const size_t n = drop_vec.size();
1350 
1351  if (distanceLaplacianAlgo == unscaled_cut) {
1352  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1353  return a.val > b.val;
1354  });
1355 
1356  bool drop = false;
1357  for (size_t i = 1; i < n; ++i) {
1358  if (!drop) {
1359  auto const& x = drop_vec[i - 1];
1360  auto const& y = drop_vec[i];
1361  auto a = x.val;
1362  auto b = y.val;
1363  if (realThreshold * realThreshold * a > b) {
1364  drop = true;
1365 #ifdef HAVE_MUELU_DEBUG
1366  if (distanceLaplacianCutVerbose) {
1367  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1368  }
1369 #endif
1370  }
1371  }
1372  drop_vec[i].drop = drop;
1373  }
1374  } else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1375  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1376  return a.val / a.diag > b.val / b.diag;
1377  });
1378 
1379  bool drop = false;
1380  for (size_t i = 1; i < n; ++i) {
1381  if (!drop) {
1382  auto const& x = drop_vec[i - 1];
1383  auto const& y = drop_vec[i];
1384  auto a = x.val / x.diag;
1385  auto b = y.val / y.diag;
1386  if (realThreshold * realThreshold * a > b) {
1387  drop = true;
1388 #ifdef HAVE_MUELU_DEBUG
1389  if (distanceLaplacianCutVerbose) {
1390  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1391  }
1392 #endif
1393  }
1394  }
1395  drop_vec[i].drop = drop;
1396  }
1397  }
1398 
1399  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1400  return a.col < b.col;
1401  });
1402 
1403  for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) {
1404  LO col = indices[drop_vec[idxID].col];
1405 
1406  // don't drop diagonal
1407  if (row == col) {
1408  columns(realnnz++) = col;
1409  rownnz++;
1410  // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1411  continue;
1412  }
1413 
1414  if (!drop_vec[idxID].drop) {
1415  columns(realnnz++) = col;
1416  // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1417  rownnz++;
1418  } else {
1419  // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1420  numDropped++;
1421  }
1422  }
1423  }
1424  } else {
1425  // Skip laplace calculation and threshold comparison for zero threshold
1426  for (LO colID = 0; colID < nnz; colID++) {
1427  LO col = indices[colID];
1428  columns(realnnz++) = col;
1429  rownnz++;
1430  }
1431  }
1432 
1433  if (rownnz == 1) {
1434  // If the only element remaining after filtering is diagonal, mark node as boundary
1435  // FIXME: this should really be replaced by the following
1436  // if (indices.size() == 1 && indices[0] == row)
1437  // boundaryNodes[row] = true;
1438  // We do not do it this way now because there is no framework for distinguishing isolated
1439  // and boundary nodes in the aggregation algorithms
1440  amalgBoundaryNodes[row] = true;
1441  }
1442 
1443  if (use_stop_array)
1444  rows_stop[row] = rownnz + rows[row];
1445  else
1446  rows[row + 1] = realnnz;
1447  } // for (LO row = 0; row < numRows; row++)
1448 
1449  } // subtimer
1450 
1451  if (use_stop_array) {
1452  // Do symmetrization of the cut matrix
1453  // NOTE: We assume nested row/column maps here
1454  for (LO row = 0; row < numRows; row++) {
1455  for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1456  LO col = columns[colidx];
1457  if (col >= numRows) continue;
1458 
1459  bool found = false;
1460  for (LO t_col = rows(col); !found && t_col < rows_stop[col]; t_col++) {
1461  if (columns[t_col] == row)
1462  found = true;
1463  }
1464  // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1465  // into a Dirichlet unknown. In that case don't.
1466  if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1467  LO new_idx = rows_stop[col];
1468  // printf("(%d,%d) SYMADD entry\n",col,row);
1469  columns[new_idx] = row;
1470  rows_stop[col]++;
1471  numDropped--;
1472  }
1473  }
1474  }
1475 
1476  // Condense everything down
1477  LO current_start = 0;
1478  for (LO row = 0; row < numRows; row++) {
1479  LO old_start = current_start;
1480  for (LO col = rows(row); col < rows_stop[row]; col++) {
1481  if (current_start != col) {
1482  columns(current_start) = columns(col);
1483  }
1484  current_start++;
1485  }
1486  rows[row] = old_start;
1487  }
1488  rows(numRows) = realnnz = current_start;
1489  }
1490 
1491  RCP<LWGraph> graph;
1492  {
1493  SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1494  graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1495  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1496  } // subtimer
1497 
1498  if (GetVerbLevel() & Statistics1) {
1499  GO numLocalBoundaryNodes = 0;
1500  GO numGlobalBoundaryNodes = 0;
1501 
1502  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1503  if (amalgBoundaryNodes(i))
1504  numLocalBoundaryNodes++;
1505 
1506  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1507  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1508  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1509  << " using threshold " << dirichletThreshold << std::endl;
1510  }
1511 
1512  Set(currentLevel, "Graph", graph);
1513  Set(currentLevel, "DofsPerNode", blkSize);
1514  }
1515  }
1516 
1517  if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1518  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1519  GO numGlobalTotal, numGlobalDropped;
1520  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1521  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1522  GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1523  if (numGlobalTotal != 0)
1524  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1525  GetOStream(Statistics1) << std::endl;
1526  }
1527 
1528  } else {
1529  // what Tobias has implemented
1530 
1531  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1532  // GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1533  GetOStream(Runtime0) << "algorithm = \""
1534  << "failsafe"
1535  << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1536  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1537 
1538  RCP<const Map> rowMap = A->getRowMap();
1539  RCP<const Map> colMap = A->getColMap();
1540 
1541  LO blockdim = 1; // block dim for fixed size blocks
1542  GO indexBase = rowMap->getIndexBase(); // index base of maps
1543  GO offset = 0;
1544 
1545  // 1) check for blocking/striding information
1546  if (A->IsView("stridedMaps") &&
1547  Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1548  Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1549  RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1550  TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1551  blockdim = strMap->getFixedBlockSize();
1552  offset = strMap->getOffset();
1553  oldView = A->SwitchToView(oldView);
1554  GetOStream(Statistics1) << "CoalesceDropFactory::Build():"
1555  << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1556  } else
1557  GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1558 
1559  // 2) get row map for amalgamated matrix (graph of A)
1560  // with same distribution over all procs as row map of A
1561  RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1562  GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1563 
1564  // 3) create graph of amalgamated matrix
1565  RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1566 
1567  LO numRows = A->getRowMap()->getLocalNumElements();
1568  LO numNodes = nodeMap->getLocalNumElements();
1569  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numNodes);
1570  Kokkos::deep_copy(amalgBoundaryNodes, false);
1571  const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1572  bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1573 
1574  // 4) do amalgamation. generate graph of amalgamated matrix
1575  // Note, this code is much more inefficient than the leightwight implementation
1576  // Most of the work has already been done in the AmalgamationFactory
1577  for (LO row = 0; row < numRows; row++) {
1578  // get global DOF id
1579  GO grid = rowMap->getGlobalElement(row);
1580 
1581  // reinitialize boolean helper variable
1582  bIsDiagonalEntry = false;
1583 
1584  // translate grid to nodeid
1585  GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1586 
1587  size_t nnz = A->getNumEntriesInLocalRow(row);
1590  A->getLocalRowView(row, indices, vals);
1591 
1592  RCP<std::vector<GO>> cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1593  LO realnnz = 0;
1594  for (LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1595  GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1596 
1597  if (vals[col] != STS::zero()) {
1598  GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1599  cnodeIds->push_back(cnodeId);
1600  realnnz++; // increment number of nnz in matrix row
1601  if (grid == gcid) bIsDiagonalEntry = true;
1602  }
1603  }
1604 
1605  if (realnnz == 1 && bIsDiagonalEntry == true) {
1606  LO lNodeId = nodeMap->getLocalElement(nodeId);
1607  numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1608  if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1609  amalgBoundaryNodes[lNodeId] = true;
1610  }
1611 
1612  Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp(cnodeIds);
1613 
1614  if (arr_cnodeIds.size() > 0)
1615  crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1616  }
1617  // fill matrix graph
1618  crsGraph->fillComplete(nodeMap, nodeMap);
1619 
1620  // 5) create MueLu Graph object
1621  RCP<LWGraph> graph = rcp(new LWGraph(crsGraph, "amalgamated graph of A"));
1622 
1623  // Detect and record rows that correspond to Dirichlet boundary conditions
1624  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1625 
1626  if (GetVerbLevel() & Statistics1) {
1627  GO numLocalBoundaryNodes = 0;
1628  GO numGlobalBoundaryNodes = 0;
1629  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1630  if (amalgBoundaryNodes(i))
1631  numLocalBoundaryNodes++;
1632  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1633  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1634  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1635  }
1636 
1637  // 6) store results in Level
1638  // graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1639  Set(currentLevel, "DofsPerNode", blockdim);
1640  Set(currentLevel, "Graph", graph);
1641 
1642  } // if (doExperimentalWrap) ... else ...
1643 
1644 } // Build
1645 
1646 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1647 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1648  typedef typename ArrayView<const LO>::size_type size_type;
1649 
1650  // extract striding information
1651  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1652  if (A.IsView("stridedMaps") == true) {
1653  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1654  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1655  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1656  if (strMap->getStridedBlockId() > -1)
1657  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1658  }
1659 
1660  // count nonzero entries in all dof rows associated with node row
1661  size_t nnz = 0, pos = 0;
1662  for (LO j = 0; j < blkSize; j++)
1663  nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1664 
1665  if (nnz == 0) {
1666  cols.resize(0);
1667  return;
1668  }
1669 
1670  cols.resize(nnz);
1671 
1672  // loop over all local dof rows associated with local node "row"
1673  ArrayView<const LO> inds;
1674  ArrayView<const SC> vals;
1675  for (LO j = 0; j < blkSize; j++) {
1676  A.getLocalRowView(row * blkSize + j, inds, vals);
1677  size_type numIndices = inds.size();
1678 
1679  if (numIndices == 0) // skip empty dof rows
1680  continue;
1681 
1682  // cols: stores all local node ids for current local node id "row"
1683  cols[pos++] = translation[inds[0]];
1684  for (size_type k = 1; k < numIndices; k++) {
1685  LO nodeID = translation[inds[k]];
1686  // Here we try to speed up the process by reducing the size of an array
1687  // to sort. This works if the column nonzeros belonging to the same
1688  // node are stored consequently.
1689  if (nodeID != cols[pos - 1])
1690  cols[pos++] = nodeID;
1691  }
1692  }
1693  cols.resize(pos);
1694  nnz = pos;
1695 
1696  // Sort and remove duplicates
1697  std::sort(cols.begin(), cols.end());
1698  pos = 0;
1699  for (size_t j = 1; j < nnz; j++)
1700  if (cols[j] != cols[pos])
1701  cols[++pos] = cols[j];
1702  cols.resize(pos + 1);
1703 }
1704 
1705 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1706 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1707  typedef typename ArrayView<const LO>::size_type size_type;
1708  typedef Teuchos::ScalarTraits<SC> STS;
1709 
1710  // extract striding information
1711  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1712  if (A.IsView("stridedMaps") == true) {
1713  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1714  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1715  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1716  if (strMap->getStridedBlockId() > -1)
1717  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1718  }
1719 
1720  // count nonzero entries in all dof rows associated with node row
1721  size_t nnz = 0, pos = 0;
1722  for (LO j = 0; j < blkSize; j++)
1723  nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1724 
1725  if (nnz == 0) {
1726  cols.resize(0);
1727  return;
1728  }
1729 
1730  cols.resize(nnz);
1731 
1732  // loop over all local dof rows associated with local node "row"
1733  ArrayView<const LO> inds;
1734  ArrayView<const SC> vals;
1735  for (LO j = 0; j < blkSize; j++) {
1736  A.getLocalRowView(row * blkSize + j, inds, vals);
1737  size_type numIndices = inds.size();
1738 
1739  if (numIndices == 0) // skip empty dof rows
1740  continue;
1741 
1742  // cols: stores all local node ids for current local node id "row"
1743  LO prevNodeID = -1;
1744  for (size_type k = 0; k < numIndices; k++) {
1745  LO dofID = inds[k];
1746  LO nodeID = translation[inds[k]];
1747 
1748  // we avoid a square root by using squared values
1749  typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]); // eps^2 * |a_ii| * |a_jj|
1750  typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]);
1751 
1752  // check dropping criterion
1753  if (aij > aiiajj || (row * blkSize + j == dofID)) {
1754  // accept entry in graph
1755 
1756  // Here we try to speed up the process by reducing the size of an array
1757  // to sort. This works if the column nonzeros belonging to the same
1758  // node are stored consequently.
1759  if (nodeID != prevNodeID) {
1760  cols[pos++] = nodeID;
1761  prevNodeID = nodeID;
1762  }
1763  }
1764  }
1765  }
1766  cols.resize(pos);
1767  nnz = pos;
1768 
1769  // Sort and remove duplicates
1770  std::sort(cols.begin(), cols.end());
1771  pos = 0;
1772  for (size_t j = 1; j < nnz; j++)
1773  if (cols[j] != cols[pos])
1774  cols[++pos] = cols[j];
1775  cols.resize(pos + 1);
1776 
1777  return;
1778 }
1779 
1780 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1782  typedef Teuchos::ScalarTraits<SC> STS;
1783 
1784  const ParameterList& pL = GetParameterList();
1785  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1786  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1787 
1788  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
1789  RCP<LocalOrdinalVector> ghostedBlockNumber;
1790  GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
1791 
1792  // Ghost the column block numbers if we need to
1793  RCP<const Import> importer = A->getCrsGraph()->getImporter();
1794  if (!importer.is_null()) {
1795  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1796  ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
1797  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1798  } else {
1799  ghostedBlockNumber = BlockNumber;
1800  }
1801 
1802  // Accessors for block numbers
1803  Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1804  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1805 
1806  // allocate space for the local graph
1807  typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type rows_mat;
1808  typename LWGraph::row_type::non_const_type rows_graph;
1809  typename LWGraph::entries_type::non_const_type columns;
1810  typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type values;
1811  RCP<CrsMatrixWrap> crs_matrix_wrap;
1812 
1813  if (generate_matrix) {
1814  crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1815  rows_mat = typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type("rows_mat", A->getLocalNumRows() + 1);
1816  } else {
1817  rows_graph = typename LWGraph::row_type::non_const_type("rows_graph", A->getLocalNumRows() + 1);
1818  }
1819  columns = typename LWGraph::entries_type::non_const_type("columns", A->getLocalNumEntries());
1820  values = typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type("values", A->getLocalNumEntries());
1821 
1822  LO realnnz = 0;
1823  GO numDropped = 0, numTotal = 0;
1824  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1825  LO row_block = row_block_number[row];
1826  size_t nnz = A->getNumEntriesInLocalRow(row);
1827  ArrayView<const LO> indices;
1828  ArrayView<const SC> vals;
1829  A->getLocalRowView(row, indices, vals);
1830 
1831  LO rownnz = 0;
1832  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1833  LO col = indices[colID];
1834  LO col_block = col_block_number[col];
1835 
1836  if (row_block == col_block) {
1837  if (generate_matrix) values[realnnz] = vals[colID];
1838  columns[realnnz++] = col;
1839  rownnz++;
1840  } else
1841  numDropped++;
1842  }
1843  if (generate_matrix)
1844  rows_mat[row + 1] = realnnz;
1845  else
1846  rows_graph[row + 1] = realnnz;
1847  }
1848 
1849  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1850  if (rowSumTol > 0.)
1851  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
1852 
1853  numTotal = A->getLocalNumEntries();
1854 
1855  if (GetVerbLevel() & Statistics1) {
1856  GO numLocalBoundaryNodes = 0;
1857  GO numGlobalBoundaryNodes = 0;
1858  for (size_t i = 0; i < boundaryNodes.size(); ++i)
1859  if (boundaryNodes(i))
1860  numLocalBoundaryNodes++;
1861  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1862  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1863  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1864 
1865  GO numGlobalTotal, numGlobalDropped;
1866  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1867  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1868  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1869  if (numGlobalTotal != 0)
1870  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1871  GetOStream(Statistics1) << std::endl;
1872  }
1873 
1874  Set(currentLevel, "Filtering", true);
1875 
1876  if (generate_matrix) {
1877  // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1878  // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1879  // here, which is legit, because we never use them anyway.
1880  if constexpr (std::is_same<typename LWGraph::row_type,
1881  typename CrsMatrix::local_matrix_type::row_map_type>::value) {
1882  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values);
1883  } else {
1884  auto rows_mat2 = typename CrsMatrix::local_matrix_type::row_map_type::non_const_type("rows_mat2", rows_mat.extent(0));
1885  Kokkos::deep_copy(rows_mat2, rows_mat);
1886  auto columns2 = typename CrsMatrix::local_graph_type::entries_type::non_const_type("columns2", columns.extent(0));
1887  Kokkos::deep_copy(columns2, columns);
1888  auto values2 = typename CrsMatrix::local_matrix_type::values_type::non_const_type("values2", values.extent(0));
1889  Kokkos::deep_copy(values2, values);
1890  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat2, columns2, values2);
1891  }
1892  crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1893  } else {
1894  RCP<LWGraph> graph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1895  graph->SetBoundaryNodeMap(boundaryNodes);
1896  Set(currentLevel, "Graph", graph);
1897  }
1898 
1899  Set(currentLevel, "DofsPerNode", 1);
1900  return crs_matrix_wrap;
1901 }
1902 
1903 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1905  TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1906  const ParameterList& pL = GetParameterList();
1907 
1908  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1909 
1910  GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1911  if (localizeColoringGraph)
1912  GetOStream(Statistics1) << ", with localization" << std::endl;
1913  else
1914  GetOStream(Statistics1) << ", without localization" << std::endl;
1915 
1916  // Accessors for block numbers
1917  Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1918  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1919 
1920  // allocate space for the local graph
1921  ArrayRCP<size_t> rows_mat;
1922  typename LWGraph::row_type::non_const_type rows_graph("rows_graph", inputGraph->GetNodeNumVertices() + 1);
1923  typename LWGraph::entries_type::non_const_type columns("columns", inputGraph->GetNodeNumEdges());
1924 
1925  LO realnnz = 0;
1926  GO numDropped = 0, numTotal = 0;
1927  const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getLocalNumElements());
1928  if (localizeColoringGraph) {
1929  for (LO row = 0; row < numRows; ++row) {
1930  LO row_block = row_block_number[row];
1931  auto indices = inputGraph->getNeighborVertices(row);
1932 
1933  LO rownnz = 0;
1934  for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1935  LO col = indices(colID);
1936  LO col_block = col_block_number[col];
1937 
1938  if ((row_block == col_block) && (col < numRows)) {
1939  columns(realnnz++) = col;
1940  rownnz++;
1941  } else
1942  numDropped++;
1943  }
1944  rows_graph(row + 1) = realnnz;
1945  }
1946  } else {
1947  // ghosting of boundary node map
1948  auto boundaryNodes = inputGraph->GetBoundaryNodeMap();
1950  for (size_t i = 0; i < inputGraph->GetNodeNumVertices(); i++)
1951  boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1952  // Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1954  boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
1955  auto boundaryColumn = boundaryColumnVector->getData(0);
1956 
1957  for (LO row = 0; row < numRows; ++row) {
1958  LO row_block = row_block_number[row];
1959  auto indices = inputGraph->getNeighborVertices(row);
1960 
1961  LO rownnz = 0;
1962  for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1963  LO col = indices(colID);
1964  LO col_block = col_block_number[col];
1965 
1966  if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1967  columns(realnnz++) = col;
1968  rownnz++;
1969  } else
1970  numDropped++;
1971  }
1972  rows_graph(row + 1) = realnnz;
1973  }
1974  }
1975 
1976  numTotal = inputGraph->GetNodeNumEdges();
1977 
1978  if (GetVerbLevel() & Statistics1) {
1979  RCP<const Teuchos::Comm<int>> comm = inputGraph->GetDomainMap()->getComm();
1980  GO numGlobalTotal, numGlobalDropped;
1981  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1982  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1983  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1984  if (numGlobalTotal != 0)
1985  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1986  GetOStream(Statistics1) << std::endl;
1987  }
1988 
1989  if (localizeColoringGraph) {
1990  outputGraph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1991  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1992  } else {
1993  TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
1994 #ifdef HAVE_XPETRA_TPETRA
1995  auto outputGraph2 = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1996 
1997  auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
1999  auto tpGraphSym = sym->symmetrize();
2000  auto lclGraphSym = tpGraphSym->getLocalGraphHost();
2001  auto colIndsSym = lclGraphSym.entries;
2002 
2003  auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
2004  typename LWGraph::row_type::non_const_type rows_graphSym("rows_graphSym", rowsSym.size());
2005  for (size_t row = 0; row < rowsSym.size(); row++)
2006  rows_graphSym(row) = rowsSym(row);
2007  outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
2008  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
2009 #endif
2010  }
2011 }
2012 
2013 } // namespace MueLu
2014 
2015 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
Important warning messages (one line)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Exception indicating invalid cast attempted.
void reserve(size_type n)
void BlockDiagonalizeGraph(const RCP< LWGraph > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< LWGraph > &outputGraph, RCP< const Import > &importer) const
#define MueLu_sumAll(rcpComm, in, out)
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
void setValidator(RCP< const ParameterEntryValidator > const &validator)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
Print more statistics.
size_type size() const
DropTol & operator=(DropTol const &)=default
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
LocalOrdinal LO
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
size_type size() const
Exception throws to report incompatible objects (like maps).
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
Scalar GetThreshold() const
Return threshold value.
Additional warnings.
void resize(const size_type n, const T &val=T())
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
#define SET_VALID_ENTRY(name)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void resize(size_type new_size, const value_type &x=value_type())
static Teuchos::RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
void Build(Level &currentLevel) const
Build an object with this factory.
std::string viewLabel_t
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
iterator end()
void DeclareInput(Level &currentLevel) const
Input.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
Print class parameters.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > toTpetra(const RCP< const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &graph)
void push_back(const value_type &x)
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
RCP< const CrsGraph< int, GlobalOrdinal, Node > > toXpetra(const Epetra_CrsGraph &g)
size_type size() const
Scalar SC
const RCP< const Map > GetDomainMap() const
Lightweight MueLu representation of a compressed row storage graph.
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
ParameterEntry & getEntry(const std::string &name)
bool is_null() const
iterator begin()
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
bool is_null() const