MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
48 
50 #include <Xpetra_CrsGraph.hpp>
51 #include <Xpetra_ImportFactory.hpp>
52 #include <Xpetra_ExportFactory.hpp>
53 #include <Xpetra_MapFactory.hpp>
54 #include <Xpetra_Map.hpp>
55 #include <Xpetra_Matrix.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_MultiVector.hpp>
58 #include <Xpetra_StridedMap.hpp>
59 #include <Xpetra_VectorFactory.hpp>
60 #include <Xpetra_Vector.hpp>
61 
62 #include <Xpetra_IO.hpp>
63 
65 
66 #include "MueLu_AmalgamationFactory.hpp"
67 #include "MueLu_AmalgamationInfo.hpp"
68 #include "MueLu_Exceptions.hpp"
69 #include "MueLu_LWGraph.hpp"
70 
71 #include "MueLu_Level.hpp"
72 #include "MueLu_LWGraph.hpp"
73 #include "MueLu_MasterList.hpp"
74 #include "MueLu_Monitor.hpp"
75 #include "MueLu_PreDropFunctionConstVal.hpp"
76 #include "MueLu_Utilities.hpp"
77 
78 #ifdef HAVE_XPETRA_TPETRA
79 #include "Tpetra_CrsGraphTransposer.hpp"
80 #endif
81 
82 #include <algorithm>
83 #include <cstdlib>
84 #include <string>
85 
86 // If defined, read environment variables.
87 // Should be removed once we are confident that this works.
88 //#define DJS_READ_ENV_VARIABLES
89 
90 namespace MueLu {
91 
92 namespace Details {
93 template <class real_type, class LO>
94 struct DropTol {
95  DropTol() = default;
96  DropTol(DropTol const&) = default;
97  DropTol(DropTol&&) = default;
98 
99  DropTol& operator=(DropTol const&) = default;
100  DropTol& operator=(DropTol&&) = default;
101 
102  DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
103  : val{val_}
104  , diag{diag_}
105  , col{col_}
106  , drop{drop_} {}
107 
111  bool drop{true};
112 
113  // CMS: Auxillary information for debugging info
114  // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
115 };
116 } // namespace Details
117 
118 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
120  RCP<ParameterList> validParamList = rcp(new ParameterList());
121 
122 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
123  SET_VALID_ENTRY("aggregation: drop tol");
124  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
125  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
126  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
127  SET_VALID_ENTRY("aggregation: row sum drop tol");
128  SET_VALID_ENTRY("aggregation: drop scheme");
129  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
130  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
131  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
132 
133  {
135  // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
136  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple<std::string>("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"), "aggregation: drop scheme")));
137  }
138  SET_VALID_ENTRY("aggregation: distance laplacian algo");
139  SET_VALID_ENTRY("aggregation: classical algo");
140  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
141 #undef SET_VALID_ENTRY
142  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
143 
144  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
145  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
146  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
147  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
148 
149  return validParamList;
150 }
151 
152 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
154  : predrop_(Teuchos::null) {}
155 
156 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
158  Input(currentLevel, "A");
159  Input(currentLevel, "UnAmalgamationInfo");
160 
161  const ParameterList& pL = GetParameterList();
162  if (pL.get<bool>("lightweight wrap") == true) {
163  std::string algo = pL.get<std::string>("aggregation: drop scheme");
164  if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
165  Input(currentLevel, "Coordinates");
166  }
167  if (algo == "signed classical sa")
168  ;
169  else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
170  Input(currentLevel, "BlockNumber");
171  }
172  }
173 }
174 
175 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
177  FactoryMonitor m(*this, "Build", currentLevel);
178 
179  typedef Teuchos::ScalarTraits<SC> STS;
180  typedef typename STS::magnitudeType real_type;
181  typedef Xpetra::MultiVector<real_type, LO, GO, NO> RealValuedMultiVector;
182  typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
183 
184  if (predrop_ != Teuchos::null)
185  GetOStream(Parameters0) << predrop_->description();
186 
187  RCP<Matrix> realA = Get<RCP<Matrix>>(currentLevel, "A");
188  RCP<AmalgamationInfo> amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
189  const ParameterList& pL = GetParameterList();
190  bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
191 
192  GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
193  std::string algo = pL.get<std::string>("aggregation: drop scheme");
194  const bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
195 
197  RCP<Matrix> A;
198 
199  bool use_block_algorithm = false;
200  LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
201  bool useSignedClassicalRS = false;
202  bool useSignedClassicalSA = false;
203  bool generateColoringGraph = false;
204 
205  // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
206  // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case
207  typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
208 
209  RCP<LocalOrdinalVector> ghostedBlockNumber;
210  ArrayRCP<const LO> g_block_id;
211 
212  if (algo == "distance laplacian") {
213  // Grab the coordinates for distance laplacian
214  Coords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
215  A = realA;
216  } else if (algo == "signed classical sa") {
217  useSignedClassicalSA = true;
218  algo = "classical";
219  A = realA;
220  } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
221  useSignedClassicalRS = true;
222  // if(realA->GetFixedBlockSize() > 1) {
223  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
224  // Ghost the column block numbers if we need to
225  RCP<const Import> importer = realA->getCrsGraph()->getImporter();
226  if (!importer.is_null()) {
227  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
228  ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
229  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
230  } else {
231  ghostedBlockNumber = BlockNumber;
232  }
233  g_block_id = ghostedBlockNumber->getData(0);
234  // }
235  if (algo == "block diagonal colored signed classical")
236  generateColoringGraph = true;
237  algo = "classical";
238  A = realA;
239 
240  } else if (algo == "block diagonal") {
241  // Handle the "block diagonal" filtering and then leave
242  BlockDiagonalize(currentLevel, realA, false);
243  return;
244  } else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
245  // Handle the "block diagonal" filtering, and then continue onward
246  use_block_algorithm = true;
247  RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel, realA, true);
248  if (algo == "block diagonal distance laplacian") {
249  // We now need to expand the coordinates by the interleaved blocksize
250  RCP<RealValuedMultiVector> OldCoords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
251  if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
252  LO dim = (LO)OldCoords->getNumVectors();
253  Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
254  for (LO k = 0; k < dim; k++) {
255  ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
256  ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
257  for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) {
258  LO new_base = i * dim;
259  for (LO j = 0; j < interleaved_blocksize; j++)
260  new_vec[new_base + j] = old_vec[i];
261  }
262  }
263  } else {
264  Coords = OldCoords;
265  }
266  algo = "distance laplacian";
267  } else if (algo == "block diagonal classical") {
268  algo = "classical";
269  }
270  // All cases
271  A = filteredMatrix;
272  rowSumTol = -1.0;
273  } else {
274  A = realA;
275  }
276 
277  // Distance Laplacian weights
278  Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
279  enum { NO_WEIGHTS = 0,
280  SINGLE_WEIGHTS,
281  BLOCK_WEIGHTS };
282  int use_dlap_weights = NO_WEIGHTS;
283  if (algo == "distance laplacian") {
284  LO dim = (LO)Coords->getNumVectors();
285  // If anything isn't 1.0 we need to turn on the weighting
286  bool non_unity = false;
287  for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
288  if (dlap_weights[i] != 1.0) {
289  non_unity = true;
290  }
291  }
292  if (non_unity) {
293  LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
294  if ((LO)dlap_weights.size() == dim)
295  use_dlap_weights = SINGLE_WEIGHTS;
296  else if ((LO)dlap_weights.size() == blocksize * dim)
297  use_dlap_weights = BLOCK_WEIGHTS;
298  else {
300  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
301  }
302  if (GetVerbLevel() & Statistics1)
303  GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
304  }
305  }
306 
307  // decide wether to use the fast-track code path for standard maps or the somewhat slower
308  // code path for non-standard maps
309  /*bool bNonStandardMaps = false;
310  if (A->IsView("stridedMaps") == true) {
311  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
312  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
313  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
314  if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
315  bNonStandardMaps = true;
316  }*/
317 
318  if (doExperimentalWrap) {
319  TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
320  TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
321 
322  SC threshold;
323  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
324  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
325  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
326  else
327  threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
328 
329  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
330  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
331  real_type realThreshold = STS::magnitude(threshold); // CMS: Rename this to "magnitude threshold" sometime
332 
334  // Remove this bit once we are confident that cut-based dropping works.
335 #ifdef HAVE_MUELU_DEBUG
336  int distanceLaplacianCutVerbose = 0;
337 #endif
338 #ifdef DJS_READ_ENV_VARIABLES
339  if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
340  distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
341  }
342 
343  if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
344  auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
345  realThreshold = 1e-4 * tmp;
346  }
347 
348 #ifdef HAVE_MUELU_DEBUG
349  if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
350  distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
351  }
352 #endif
353 #endif
354 
356  enum decisionAlgoType { defaultAlgo,
357  unscaled_cut,
358  scaled_cut,
359  scaled_cut_symmetric };
360 
361  decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
362  decisionAlgoType classicalAlgo = defaultAlgo;
363  if (algo == "distance laplacian") {
364  if (distanceLaplacianAlgoStr == "default")
365  distanceLaplacianAlgo = defaultAlgo;
366  else if (distanceLaplacianAlgoStr == "unscaled cut")
367  distanceLaplacianAlgo = unscaled_cut;
368  else if (distanceLaplacianAlgoStr == "scaled cut")
369  distanceLaplacianAlgo = scaled_cut;
370  else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
371  distanceLaplacianAlgo = scaled_cut_symmetric;
372  else
373  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
374  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
375  } else if (algo == "classical") {
376  if (classicalAlgoStr == "default")
377  classicalAlgo = defaultAlgo;
378  else if (classicalAlgoStr == "unscaled cut")
379  classicalAlgo = unscaled_cut;
380  else if (classicalAlgoStr == "scaled cut")
381  classicalAlgo = scaled_cut;
382  else
383  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
384  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
385 
386  } else
387  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
388  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
389 
390  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
391 
392  // NOTE: We don't support signed classical RS or SA with cut drop at present
393  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
394  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
395 
396  GO numDropped = 0, numTotal = 0;
397  std::string graphType = "unamalgamated"; // for description purposes only
398 
399  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
400  BlockSize is the number of storage blocks that must kept together during the amalgamation process.
401 
402  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
403 
404  numPDEs = BlockSize * storageblocksize.
405 
406  If numPDEs==1
407  Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1
408  No other values makes sense.
409 
410  If numPDEs>1
411  If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs.
412  If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n.
413  Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested.
414  */
415  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
416  const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
417 
418  /************************** RS or SA-style Classical Dropping (and variants) **************************/
419  if (algo == "classical") {
420  if (predrop_ == null) {
421  // ap: this is a hack: had to declare predrop_ as mutable
422  predrop_ = rcp(new PreDropFunctionConstVal(threshold));
423  }
424 
425  if (predrop_ != null) {
426  RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
427  TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
428  "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
429  // If a user provided a predrop function, it overwrites the XML threshold parameter
430  SC newt = predropConstVal->GetThreshold();
431  if (newt != threshold) {
432  GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
433  threshold = newt;
434  }
435  }
436  // At this points we either have
437  // (predrop_ != null)
438  // Therefore, it is sufficient to check only threshold
439  if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
440  // Case 1: scalar problem, no dropping => just use matrix graph
441  RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
442  // Detect and record rows that correspond to Dirichlet boundary conditions
443  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
444  if (rowSumTol > 0.)
445  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
446 
447  graph->SetBoundaryNodeMap(boundaryNodes);
448  numTotal = A->getLocalNumEntries();
449 
450  if (GetVerbLevel() & Statistics1) {
451  GO numLocalBoundaryNodes = 0;
452  GO numGlobalBoundaryNodes = 0;
453  for (size_t i = 0; i < boundaryNodes.size(); ++i)
454  if (boundaryNodes[i])
455  numLocalBoundaryNodes++;
456  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
457  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
458  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
459  }
460 
461  Set(currentLevel, "DofsPerNode", 1);
462  Set(currentLevel, "Graph", graph);
463 
464  } else if ((BlockSize == 1 && threshold != STS::zero()) ||
465  (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
466  (BlockSize == 1 && useSignedClassicalRS) ||
467  (BlockSize == 1 && useSignedClassicalSA)) {
468  // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
469  // graph's map information, e.g., whether index is local
470  // OR a matrix without a CrsGraph
471 
472  // allocate space for the local graph
473  typename LWGraph::row_type::non_const_type rows("rows", A->getLocalNumRows() + 1);
474  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
475 
476  using MT = typename STS::magnitudeType;
477  RCP<Vector> ghostedDiag;
478  ArrayRCP<const SC> ghostedDiagVals;
479  ArrayRCP<const MT> negMaxOffDiagonal;
480  // RS style needs the max negative off-diagonal, SA style needs the diagonal
481  if (useSignedClassicalRS) {
482  if (ghostedBlockNumber.is_null()) {
484  if (GetVerbLevel() & Statistics1)
485  GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
486  } else {
487  negMaxOffDiagonal = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber);
488  if (GetVerbLevel() & Statistics1)
489  GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
490  }
491  } else {
493  ghostedDiagVals = ghostedDiag->getData(0);
494  }
495  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
496  if (rowSumTol > 0.) {
497  if (ghostedBlockNumber.is_null()) {
498  if (GetVerbLevel() & Statistics1)
499  GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
500  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
501  } else {
502  if (GetVerbLevel() & Statistics1)
503  GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
504  Utilities::ApplyRowSumCriterionHost(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
505  }
506  }
507 
508  LO realnnz = 0;
509  rows(0) = 0;
510  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
511  size_t nnz = A->getNumEntriesInLocalRow(row);
512  bool rowIsDirichlet = boundaryNodes[row];
513  ArrayView<const LO> indices;
514  ArrayView<const SC> vals;
515  A->getLocalRowView(row, indices, vals);
516 
517  if (classicalAlgo == defaultAlgo) {
518  // FIXME the current predrop function uses the following
519  // FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
520  // FIXME but the threshold doesn't take into account the rows' diagonal entries
521  // FIXME For now, hardwiring the dropping in here
522 
523  LO rownnz = 0;
524  if (useSignedClassicalRS) {
525  // Signed classical RS style
526  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
527  LO col = indices[colID];
528  MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
529  MT neg_aij = -STS::real(vals[colID]);
530  /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
531  g_block_id.is_null() ? -1 : g_block_id[row],
532  g_block_id.is_null() ? -1 : g_block_id[col],
533  neg_aij, max_neg_aik);*/
534  if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
535  columns[realnnz++] = col;
536  rownnz++;
537  } else
538  numDropped++;
539  }
540  rows(row + 1) = realnnz;
541  } else if (useSignedClassicalSA) {
542  // Signed classical SA style
543  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
544  LO col = indices[colID];
545 
546  bool is_nonpositive = STS::real(vals[colID]) <= 0;
547  MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
548  MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
549  /*
550  if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID],
551  vals[colID],aij, aiiajj);
552  */
553 
554  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
555  columns(realnnz++) = col;
556  rownnz++;
557  } else
558  numDropped++;
559  }
560  rows[row + 1] = realnnz;
561  } else {
562  // Standard abs classical
563  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
564  LO col = indices[colID];
565  MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
566  MT aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2
567 
568  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
569  columns(realnnz++) = col;
570  rownnz++;
571  } else
572  numDropped++;
573  }
574  rows(row + 1) = realnnz;
575  }
576  } else {
577  /* Cut Algorithm */
578  // CMS
579  using DropTol = Details::DropTol<real_type, LO>;
580  std::vector<DropTol> drop_vec;
581  drop_vec.reserve(nnz);
582  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
583  const real_type one = Teuchos::ScalarTraits<real_type>::one();
584  LO rownnz = 0;
585  // NOTE: This probably needs to be fixed for rowsum
586 
587  // find magnitudes
588  for (LO colID = 0; colID < (LO)nnz; colID++) {
589  LO col = indices[colID];
590  if (row == col) {
591  drop_vec.emplace_back(zero, one, colID, false);
592  continue;
593  }
594 
595  // Don't aggregate boundaries
596  if (boundaryNodes[colID]) continue;
597  typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
598  typename STS::magnitudeType aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2
599  drop_vec.emplace_back(aij, aiiajj, colID, false);
600  }
601 
602  const size_t n = drop_vec.size();
603 
604  if (classicalAlgo == unscaled_cut) {
605  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
606  return a.val > b.val;
607  });
608 
609  bool drop = false;
610  for (size_t i = 1; i < n; ++i) {
611  if (!drop) {
612  auto const& x = drop_vec[i - 1];
613  auto const& y = drop_vec[i];
614  auto a = x.val;
615  auto b = y.val;
616  if (a > realThreshold * b) {
617  drop = true;
618 #ifdef HAVE_MUELU_DEBUG
619  if (distanceLaplacianCutVerbose) {
620  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
621  }
622 #endif
623  }
624  }
625  drop_vec[i].drop = drop;
626  }
627  } else if (classicalAlgo == scaled_cut) {
628  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
629  return a.val / a.diag > b.val / b.diag;
630  });
631  bool drop = false;
632  // printf("[%d] Scaled Cut: ",(int)row);
633  // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep");
634  for (size_t i = 1; i < n; ++i) {
635  if (!drop) {
636  auto const& x = drop_vec[i - 1];
637  auto const& y = drop_vec[i];
638  auto a = x.val / x.diag;
639  auto b = y.val / y.diag;
640  if (a > realThreshold * b) {
641  drop = true;
642 
643 #ifdef HAVE_MUELU_DEBUG
644  if (distanceLaplacianCutVerbose) {
645  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
646  }
647 #endif
648  }
649  // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep");
650  }
651  drop_vec[i].drop = drop;
652  }
653  // printf("\n");
654  }
655  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
656  return a.col < b.col;
657  });
658 
659  for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) {
660  LO col = indices[drop_vec[idxID].col];
661  // don't drop diagonal
662  if (row == col) {
663  columns[realnnz++] = col;
664  rownnz++;
665  continue;
666  }
667 
668  if (!drop_vec[idxID].drop) {
669  columns[realnnz++] = col;
670  rownnz++;
671  } else {
672  numDropped++;
673  }
674  }
675  // CMS
676  rows[row + 1] = realnnz;
677  }
678  } // end for row
679 
680  numTotal = A->getLocalNumEntries();
681 
682  if (aggregationMayCreateDirichlet) {
683  // If the only element remaining after filtering is diagonal, mark node as boundary
684  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
685  if (rows[row + 1] - rows[row] <= 1)
686  boundaryNodes[row] = true;
687  }
688  }
689 
690  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "thresholded graph of A"));
691  graph->SetBoundaryNodeMap(boundaryNodes);
692  if (GetVerbLevel() & Statistics1) {
693  GO numLocalBoundaryNodes = 0;
694  GO numGlobalBoundaryNodes = 0;
695  for (size_t i = 0; i < boundaryNodes.size(); ++i)
696  if (boundaryNodes(i))
697  numLocalBoundaryNodes++;
698  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
699  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
700  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
701  }
702  Set(currentLevel, "Graph", graph);
703  Set(currentLevel, "DofsPerNode", 1);
704 
705  // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
706  if (generateColoringGraph) {
707  RCP<LWGraph> colorGraph;
708  RCP<const Import> importer = A->getCrsGraph()->getImporter();
709  BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer);
710  Set(currentLevel, "Coloring Graph", colorGraph);
711  // #define CMS_DUMP
712 #ifdef CMS_DUMP
713  {
714  Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_regular_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
715  Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_color_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
716  // int rank = graph->GetDomainMap()->getComm()->getRank();
717  // {
718  // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
719  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
720  // colorGraph->print(*fancy,Debug);
721  // }
722  // {
723  // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
724  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
725  // graph->print(*fancy,Debug);
726  // }
727  }
728 #endif
729  } // end generateColoringGraph
730  } else if (BlockSize > 1 && threshold == STS::zero()) {
731  // Case 3: Multiple DOF/node problem without dropping
732  const RCP<const Map> rowMap = A->getRowMap();
733  const RCP<const Map> colMap = A->getColMap();
734 
735  graphType = "amalgamated";
736 
737  // build node row map (uniqueMap) and node column map (nonUniqueMap)
738  // the arrays rowTranslation and colTranslation contain the local node id
739  // given a local dof id. The data is calculated by the AmalgamationFactory and
740  // stored in the variable container "UnAmalgamationInfo"
741  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
742  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
743  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
744  Array<LO> colTranslation = *(amalInfo->getColTranslation());
745 
746  // get number of local nodes
747  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
748 
749  // Allocate space for the local graph
750  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
751  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
752 
753  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
754  Kokkos::deep_copy(amalgBoundaryNodes, false);
755 
756  // Detect and record rows that correspond to Dirichlet boundary conditions
757  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
758  // TODO the array one bigger than the number of local rows, and the last entry can
759  // TODO hold the actual number of boundary nodes. Clever, huh?
760  ArrayRCP<bool> pointBoundaryNodes;
761  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows(*A, dirichletThreshold));
762  if (rowSumTol > 0.)
763  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
764 
765  // extract striding information
766  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
767  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
768  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
769  if (A->IsView("stridedMaps") == true) {
770  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
771  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
772  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
773  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
774  blkId = strMap->getStridedBlockId();
775  if (blkId > -1)
776  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
777  }
778 
779  // loop over all local nodes
780  LO realnnz = 0;
781  rows(0) = 0;
782  Array<LO> indicesExtra;
783  for (LO row = 0; row < numRows; row++) {
784  ArrayView<const LO> indices;
785  indicesExtra.resize(0);
786 
787  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
788  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
789  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
790  // with local ids.
791  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
792  // node.
793  bool isBoundary = false;
794  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
795  for (LO j = 0; j < blkPartSize; j++) {
796  if (pointBoundaryNodes[row * blkPartSize + j]) {
797  isBoundary = true;
798  break;
799  }
800  }
801  } else {
802  isBoundary = true;
803  for (LO j = 0; j < blkPartSize; j++) {
804  if (!pointBoundaryNodes[row * blkPartSize + j]) {
805  isBoundary = false;
806  break;
807  }
808  }
809  }
810 
811  // Merge rows of A
812  // The array indicesExtra contains local column node ids for the current local node "row"
813  if (!isBoundary)
814  MergeRows(*A, row, indicesExtra, colTranslation);
815  else
816  indicesExtra.push_back(row);
817  indices = indicesExtra;
818  numTotal += indices.size();
819 
820  // add the local column node ids to the full columns array which
821  // contains the local column node ids for all local node rows
822  LO nnz = indices.size(), rownnz = 0;
823  for (LO colID = 0; colID < nnz; colID++) {
824  LO col = indices[colID];
825  columns(realnnz++) = col;
826  rownnz++;
827  }
828 
829  if (rownnz == 1) {
830  // If the only element remaining after filtering is diagonal, mark node as boundary
831  // FIXME: this should really be replaced by the following
832  // if (indices.size() == 1 && indices[0] == row)
833  // boundaryNodes[row] = true;
834  // We do not do it this way now because there is no framework for distinguishing isolated
835  // and boundary nodes in the aggregation algorithms
836  amalgBoundaryNodes[row] = true;
837  }
838  rows(row + 1) = realnnz;
839  } // for (LO row = 0; row < numRows; row++)
840 
841  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
842  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
843 
844  if (GetVerbLevel() & Statistics1) {
845  GO numLocalBoundaryNodes = 0;
846  GO numGlobalBoundaryNodes = 0;
847 
848  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
849  if (amalgBoundaryNodes(i))
850  numLocalBoundaryNodes++;
851 
852  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
853  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
854  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
855  << " agglomerated Dirichlet nodes" << std::endl;
856  }
857 
858  Set(currentLevel, "Graph", graph);
859  Set(currentLevel, "DofsPerNode", blkSize); // full block size
860 
861  } else if (BlockSize > 1 && threshold != STS::zero()) {
862  // Case 4: Multiple DOF/node problem with dropping
863  const RCP<const Map> rowMap = A->getRowMap();
864  const RCP<const Map> colMap = A->getColMap();
865  graphType = "amalgamated";
866 
867  // build node row map (uniqueMap) and node column map (nonUniqueMap)
868  // the arrays rowTranslation and colTranslation contain the local node id
869  // given a local dof id. The data is calculated by the AmalgamationFactory and
870  // stored in the variable container "UnAmalgamationInfo"
871  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
872  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
873  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
874  Array<LO> colTranslation = *(amalInfo->getColTranslation());
875 
876  // get number of local nodes
877  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
878 
879  // Allocate space for the local graph
880  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
881  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
882 
883  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
884  Kokkos::deep_copy(amalgBoundaryNodes, false);
885 
886  // Detect and record rows that correspond to Dirichlet boundary conditions
887  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
888  // TODO the array one bigger than the number of local rows, and the last entry can
889  // TODO hold the actual number of boundary nodes. Clever, huh?
890  auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
891  if (rowSumTol > 0.)
892  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
893 
894  // extract striding information
895  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
896  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
897  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
898  if (A->IsView("stridedMaps") == true) {
899  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
900  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
901  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
902  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
903  blkId = strMap->getStridedBlockId();
904  if (blkId > -1)
905  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
906  }
907 
908  // extract diagonal data for dropping strategy
910  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
911 
912  // loop over all local nodes
913  LO realnnz = 0;
914  rows[0] = 0;
915  Array<LO> indicesExtra;
916  for (LO row = 0; row < numRows; row++) {
917  ArrayView<const LO> indices;
918  indicesExtra.resize(0);
919 
920  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
921  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
922  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
923  // with local ids.
924  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
925  // node.
926  bool isBoundary = false;
927  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
928  for (LO j = 0; j < blkPartSize; j++) {
929  if (pointBoundaryNodes[row * blkPartSize + j]) {
930  isBoundary = true;
931  break;
932  }
933  }
934  } else {
935  isBoundary = true;
936  for (LO j = 0; j < blkPartSize; j++) {
937  if (!pointBoundaryNodes[row * blkPartSize + j]) {
938  isBoundary = false;
939  break;
940  }
941  }
942  }
943 
944  // Merge rows of A
945  // The array indicesExtra contains local column node ids for the current local node "row"
946  if (!isBoundary)
947  MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
948  else
949  indicesExtra.push_back(row);
950  indices = indicesExtra;
951  numTotal += indices.size();
952 
953  // add the local column node ids to the full columns array which
954  // contains the local column node ids for all local node rows
955  LO nnz = indices.size(), rownnz = 0;
956  for (LO colID = 0; colID < nnz; colID++) {
957  LO col = indices[colID];
958  columns[realnnz++] = col;
959  rownnz++;
960  }
961 
962  if (rownnz == 1) {
963  // If the only element remaining after filtering is diagonal, mark node as boundary
964  // FIXME: this should really be replaced by the following
965  // if (indices.size() == 1 && indices[0] == row)
966  // boundaryNodes[row] = true;
967  // We do not do it this way now because there is no framework for distinguishing isolated
968  // and boundary nodes in the aggregation algorithms
969  amalgBoundaryNodes[row] = true;
970  }
971  rows[row + 1] = realnnz;
972  } // for (LO row = 0; row < numRows; row++)
973  // columns.resize(realnnz);
974 
975  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
976  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
977 
978  if (GetVerbLevel() & Statistics1) {
979  GO numLocalBoundaryNodes = 0;
980  GO numGlobalBoundaryNodes = 0;
981 
982  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
983  if (amalgBoundaryNodes(i))
984  numLocalBoundaryNodes++;
985 
986  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
987  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
988  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
989  << " agglomerated Dirichlet nodes" << std::endl;
990  }
991 
992  Set(currentLevel, "Graph", graph);
993  Set(currentLevel, "DofsPerNode", blkSize); // full block size
994  }
995 
996  } else if (algo == "distance laplacian") {
997  LO blkSize = A->GetFixedBlockSize();
998  GO indexBase = A->getRowMap()->getIndexBase();
999  // [*0*] : FIXME
1000  // ap: somehow, if I move this line to [*1*], Belos throws an error
1001  // I'm not sure what's going on. Do we always have to Get data, if we did
1002  // DeclareInput for it?
1003  // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
1004 
1005  // Detect and record rows that correspond to Dirichlet boundary conditions
1006  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
1007  // TODO the array one bigger than the number of local rows, and the last entry can
1008  // TODO hold the actual number of boundary nodes. Clever, huh?
1009  auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1010  if (rowSumTol > 0.)
1011  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
1012 
1013  if ((blkSize == 1) && (threshold == STS::zero())) {
1014  // Trivial case: scalar problem, no dropping. Can return original graph
1015  RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
1016  graph->SetBoundaryNodeMap(pointBoundaryNodes);
1017  graphType = "unamalgamated";
1018  numTotal = A->getLocalNumEntries();
1019 
1020  if (GetVerbLevel() & Statistics1) {
1021  GO numLocalBoundaryNodes = 0;
1022  GO numGlobalBoundaryNodes = 0;
1023  for (size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1024  if (pointBoundaryNodes(i))
1025  numLocalBoundaryNodes++;
1026  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1027  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1028  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1029  }
1030 
1031  Set(currentLevel, "DofsPerNode", blkSize);
1032  Set(currentLevel, "Graph", graph);
1033 
1034  } else {
1035  // ap: We make quite a few assumptions here; general case may be a lot different,
1036  // but much much harder to implement. We assume that:
1037  // 1) all maps are standard maps, not strided maps
1038  // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
1039  // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
1040  //
1041  // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
1042  // but as I totally don't understand that code, here is my solution
1043 
1044  // [*1*]: see [*0*]
1045 
1046  // Check that the number of local coordinates is consistent with the #rows in A
1047  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() / blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1048  "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size (" << blkSize << ").");
1049 
1050  const RCP<const Map> colMap = A->getColMap();
1051  RCP<const Map> uniqueMap, nonUniqueMap;
1052  Array<LO> colTranslation;
1053  if (blkSize == 1) {
1054  uniqueMap = A->getRowMap();
1055  nonUniqueMap = A->getColMap();
1056  graphType = "unamalgamated";
1057 
1058  } else {
1059  uniqueMap = Coords->getMap();
1060  TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1061  "Different index bases for matrix and coordinates");
1062 
1063  AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1064 
1065  graphType = "amalgamated";
1066  }
1067  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1068 
1069  RCP<RealValuedMultiVector> ghostedCoords;
1070  RCP<Vector> ghostedLaplDiag;
1071  Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1072  if (threshold != STS::zero()) {
1073  // Get ghost coordinates
1074  RCP<const Import> importer;
1075  {
1076  SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1077  if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1078  GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1079  importer = realA->getCrsGraph()->getImporter();
1080  } else {
1081  GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1082  importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1083  }
1084  } // subtimer
1085  ghostedCoords = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(nonUniqueMap, Coords->getNumVectors());
1086  {
1087  SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1088  ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1089  } // subtimer
1090 
1091  // Construct Distance Laplacian diagonal
1092  RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1093  Array<LO> indicesExtra;
1095  if (threshold != STS::zero()) {
1096  const size_t numVectors = ghostedCoords->getNumVectors();
1097  coordData.reserve(numVectors);
1098  for (size_t j = 0; j < numVectors; j++) {
1099  Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1100  coordData.push_back(tmpData);
1101  }
1102  }
1103  {
1104  SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1105  ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1106  for (LO row = 0; row < numRows; row++) {
1107  ArrayView<const LO> indices;
1108 
1109  if (blkSize == 1) {
1110  ArrayView<const SC> vals;
1111  A->getLocalRowView(row, indices, vals);
1112 
1113  } else {
1114  // Merge rows of A
1115  indicesExtra.resize(0);
1116  MergeRows(*A, row, indicesExtra, colTranslation);
1117  indices = indicesExtra;
1118  }
1119 
1120  LO nnz = indices.size();
1121  bool haveAddedToDiag = false;
1122  for (LO colID = 0; colID < nnz; colID++) {
1123  const LO col = indices[colID];
1124 
1125  if (row != col) {
1126  if (use_dlap_weights == SINGLE_WEIGHTS) {
1127  /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1128  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1129  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1130  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1131  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1132  int block_id = row % interleaved_blocksize;
1133  int block_start = block_id * interleaved_blocksize;
1134  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1135  } else {
1136  // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1137  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1138  }
1139  haveAddedToDiag = true;
1140  }
1141  }
1142  // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1143  // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1144  if (!haveAddedToDiag)
1145  localLaplDiagData[row] = STS::rmax();
1146  }
1147  } // subtimer
1148  {
1149  SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1150  ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1151  ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1152  ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1153  } // subtimer
1154 
1155  } else {
1156  GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1157  }
1158 
1159  // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1160 
1161  // allocate space for the local graph
1162  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
1163  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
1164 
1165 #ifdef HAVE_MUELU_DEBUG
1166  // DEBUGGING
1167  for (LO i = 0; i < (LO)columns.size(); i++) columns[i] = -666;
1168 #endif
1169 
1170  // Extra array for if we're allowing symmetrization with cutting
1171  ArrayRCP<LO> rows_stop;
1172  bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1173  if (use_stop_array)
1174  // rows_stop = typename LWGraph::row_type::non_const_type("rows_stop", numRows);
1175  rows_stop.resize(numRows);
1176 
1177  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
1178  Kokkos::deep_copy(amalgBoundaryNodes, false);
1179 
1180  LO realnnz = 0;
1181  rows(0) = 0;
1182 
1183  Array<LO> indicesExtra;
1184  {
1185  SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1187  if (threshold != STS::zero()) {
1188  const size_t numVectors = ghostedCoords->getNumVectors();
1189  coordData.reserve(numVectors);
1190  for (size_t j = 0; j < numVectors; j++) {
1191  Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1192  coordData.push_back(tmpData);
1193  }
1194  }
1195 
1196  ArrayView<const SC> vals; // CMS hackery
1197  for (LO row = 0; row < numRows; row++) {
1198  ArrayView<const LO> indices;
1199  indicesExtra.resize(0);
1200  bool isBoundary = false;
1201 
1202  if (blkSize == 1) {
1203  // ArrayView<const SC> vals;//CMS uncomment
1204  A->getLocalRowView(row, indices, vals);
1205  isBoundary = pointBoundaryNodes[row];
1206  } else {
1207  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1208  for (LO j = 0; j < blkSize; j++) {
1209  if (!pointBoundaryNodes[row * blkSize + j]) {
1210  isBoundary = false;
1211  break;
1212  }
1213  }
1214 
1215  // Merge rows of A
1216  if (!isBoundary)
1217  MergeRows(*A, row, indicesExtra, colTranslation);
1218  else
1219  indicesExtra.push_back(row);
1220  indices = indicesExtra;
1221  }
1222  numTotal += indices.size();
1223 
1224  LO nnz = indices.size(), rownnz = 0;
1225 
1226  if (use_stop_array) {
1227  rows(row + 1) = rows(row) + nnz;
1228  realnnz = rows(row);
1229  }
1230 
1231  if (threshold != STS::zero()) {
1232  // default
1233  if (distanceLaplacianAlgo == defaultAlgo) {
1234  /* Standard Distance Laplacian */
1235  for (LO colID = 0; colID < nnz; colID++) {
1236  LO col = indices[colID];
1237 
1238  if (row == col) {
1239  columns(realnnz++) = col;
1240  rownnz++;
1241  continue;
1242  }
1243 
1244  // We do not want the distance Laplacian aggregating boundary nodes
1245  if (isBoundary) continue;
1246 
1247  SC laplVal;
1248  if (use_dlap_weights == SINGLE_WEIGHTS) {
1249  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1250  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1251  int block_id = row % interleaved_blocksize;
1252  int block_start = block_id * interleaved_blocksize;
1253  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1254  } else {
1255  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1256  }
1257  real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1258  real_type aij = STS::magnitude(laplVal * laplVal);
1259 
1260  if (aij > aiiajj) {
1261  columns(realnnz++) = col;
1262  rownnz++;
1263  } else {
1264  numDropped++;
1265  }
1266  }
1267  } else {
1268  /* Cut Algorithm */
1269  using DropTol = Details::DropTol<real_type, LO>;
1270  std::vector<DropTol> drop_vec;
1271  drop_vec.reserve(nnz);
1272  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1273  const real_type one = Teuchos::ScalarTraits<real_type>::one();
1274 
1275  // find magnitudes
1276  for (LO colID = 0; colID < nnz; colID++) {
1277  LO col = indices[colID];
1278 
1279  if (row == col) {
1280  drop_vec.emplace_back(zero, one, colID, false);
1281  continue;
1282  }
1283  // We do not want the distance Laplacian aggregating boundary nodes
1284  if (isBoundary) continue;
1285 
1286  SC laplVal;
1287  if (use_dlap_weights == SINGLE_WEIGHTS) {
1288  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1289  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1290  int block_id = row % interleaved_blocksize;
1291  int block_start = block_id * interleaved_blocksize;
1292  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1293  } else {
1294  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1295  }
1296 
1297  real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1298  real_type aij = STS::magnitude(laplVal * laplVal);
1299 
1300  drop_vec.emplace_back(aij, aiiajj, colID, false);
1301  }
1302 
1303  const size_t n = drop_vec.size();
1304 
1305  if (distanceLaplacianAlgo == unscaled_cut) {
1306  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1307  return a.val > b.val;
1308  });
1309 
1310  bool drop = false;
1311  for (size_t i = 1; i < n; ++i) {
1312  if (!drop) {
1313  auto const& x = drop_vec[i - 1];
1314  auto const& y = drop_vec[i];
1315  auto a = x.val;
1316  auto b = y.val;
1317  if (a > realThreshold * b) {
1318  drop = true;
1319 #ifdef HAVE_MUELU_DEBUG
1320  if (distanceLaplacianCutVerbose) {
1321  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1322  }
1323 #endif
1324  }
1325  }
1326  drop_vec[i].drop = drop;
1327  }
1328  } else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1329  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1330  return a.val / a.diag > b.val / b.diag;
1331  });
1332 
1333  bool drop = false;
1334  for (size_t i = 1; i < n; ++i) {
1335  if (!drop) {
1336  auto const& x = drop_vec[i - 1];
1337  auto const& y = drop_vec[i];
1338  auto a = x.val / x.diag;
1339  auto b = y.val / y.diag;
1340  if (a > realThreshold * b) {
1341  drop = true;
1342 #ifdef HAVE_MUELU_DEBUG
1343  if (distanceLaplacianCutVerbose) {
1344  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1345  }
1346 #endif
1347  }
1348  }
1349  drop_vec[i].drop = drop;
1350  }
1351  }
1352 
1353  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1354  return a.col < b.col;
1355  });
1356 
1357  for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) {
1358  LO col = indices[drop_vec[idxID].col];
1359 
1360  // don't drop diagonal
1361  if (row == col) {
1362  columns(realnnz++) = col;
1363  rownnz++;
1364  // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1365  continue;
1366  }
1367 
1368  if (!drop_vec[idxID].drop) {
1369  columns(realnnz++) = col;
1370  // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1371  rownnz++;
1372  } else {
1373  // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1374  numDropped++;
1375  }
1376  }
1377  }
1378  } else {
1379  // Skip laplace calculation and threshold comparison for zero threshold
1380  for (LO colID = 0; colID < nnz; colID++) {
1381  LO col = indices[colID];
1382  columns(realnnz++) = col;
1383  rownnz++;
1384  }
1385  }
1386 
1387  if (rownnz == 1) {
1388  // If the only element remaining after filtering is diagonal, mark node as boundary
1389  // FIXME: this should really be replaced by the following
1390  // if (indices.size() == 1 && indices[0] == row)
1391  // boundaryNodes[row] = true;
1392  // We do not do it this way now because there is no framework for distinguishing isolated
1393  // and boundary nodes in the aggregation algorithms
1394  amalgBoundaryNodes[row] = true;
1395  }
1396 
1397  if (use_stop_array)
1398  rows_stop[row] = rownnz + rows[row];
1399  else
1400  rows[row + 1] = realnnz;
1401  } // for (LO row = 0; row < numRows; row++)
1402 
1403  } // subtimer
1404 
1405  if (use_stop_array) {
1406  // Do symmetrization of the cut matrix
1407  // NOTE: We assume nested row/column maps here
1408  for (LO row = 0; row < numRows; row++) {
1409  for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1410  LO col = columns[colidx];
1411  if (col >= numRows) continue;
1412 
1413  bool found = false;
1414  for (LO t_col = rows(col); !found && t_col < rows_stop[col]; t_col++) {
1415  if (columns[t_col] == row)
1416  found = true;
1417  }
1418  // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1419  // into a Dirichlet unknown. In that case don't.
1420  if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1421  LO new_idx = rows_stop[col];
1422  // printf("(%d,%d) SYMADD entry\n",col,row);
1423  columns[new_idx] = row;
1424  rows_stop[col]++;
1425  numDropped--;
1426  }
1427  }
1428  }
1429 
1430  // Condense everything down
1431  LO current_start = 0;
1432  for (LO row = 0; row < numRows; row++) {
1433  LO old_start = current_start;
1434  for (LO col = rows(row); col < rows_stop[row]; col++) {
1435  if (current_start != col) {
1436  columns(current_start) = columns(col);
1437  }
1438  current_start++;
1439  }
1440  rows[row] = old_start;
1441  }
1442  rows(numRows) = realnnz = current_start;
1443  }
1444 
1445  RCP<LWGraph> graph;
1446  {
1447  SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1448  graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1449  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1450  } // subtimer
1451 
1452  if (GetVerbLevel() & Statistics1) {
1453  GO numLocalBoundaryNodes = 0;
1454  GO numGlobalBoundaryNodes = 0;
1455 
1456  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1457  if (amalgBoundaryNodes(i))
1458  numLocalBoundaryNodes++;
1459 
1460  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1461  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1462  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1463  << " using threshold " << dirichletThreshold << std::endl;
1464  }
1465 
1466  Set(currentLevel, "Graph", graph);
1467  Set(currentLevel, "DofsPerNode", blkSize);
1468  }
1469  }
1470 
1471  if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1472  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1473  GO numGlobalTotal, numGlobalDropped;
1474  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1475  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1476  GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1477  if (numGlobalTotal != 0)
1478  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1479  GetOStream(Statistics1) << std::endl;
1480  }
1481 
1482  } else {
1483  // what Tobias has implemented
1484 
1485  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1486  // GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1487  GetOStream(Runtime0) << "algorithm = \""
1488  << "failsafe"
1489  << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1490  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1491 
1492  RCP<const Map> rowMap = A->getRowMap();
1493  RCP<const Map> colMap = A->getColMap();
1494 
1495  LO blockdim = 1; // block dim for fixed size blocks
1496  GO indexBase = rowMap->getIndexBase(); // index base of maps
1497  GO offset = 0;
1498 
1499  // 1) check for blocking/striding information
1500  if (A->IsView("stridedMaps") &&
1501  Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1502  Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1503  RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1504  TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1505  blockdim = strMap->getFixedBlockSize();
1506  offset = strMap->getOffset();
1507  oldView = A->SwitchToView(oldView);
1508  GetOStream(Statistics1) << "CoalesceDropFactory::Build():"
1509  << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1510  } else
1511  GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1512 
1513  // 2) get row map for amalgamated matrix (graph of A)
1514  // with same distribution over all procs as row map of A
1515  RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1516  GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1517 
1518  // 3) create graph of amalgamated matrix
1519  RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1520 
1521  LO numRows = A->getRowMap()->getLocalNumElements();
1522  LO numNodes = nodeMap->getLocalNumElements();
1523  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numNodes);
1524  Kokkos::deep_copy(amalgBoundaryNodes, false);
1525  const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1526  bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1527 
1528  // 4) do amalgamation. generate graph of amalgamated matrix
1529  // Note, this code is much more inefficient than the leightwight implementation
1530  // Most of the work has already been done in the AmalgamationFactory
1531  for (LO row = 0; row < numRows; row++) {
1532  // get global DOF id
1533  GO grid = rowMap->getGlobalElement(row);
1534 
1535  // reinitialize boolean helper variable
1536  bIsDiagonalEntry = false;
1537 
1538  // translate grid to nodeid
1539  GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1540 
1541  size_t nnz = A->getNumEntriesInLocalRow(row);
1544  A->getLocalRowView(row, indices, vals);
1545 
1546  RCP<std::vector<GO>> cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1547  LO realnnz = 0;
1548  for (LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1549  GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1550 
1551  if (vals[col] != STS::zero()) {
1552  GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1553  cnodeIds->push_back(cnodeId);
1554  realnnz++; // increment number of nnz in matrix row
1555  if (grid == gcid) bIsDiagonalEntry = true;
1556  }
1557  }
1558 
1559  if (realnnz == 1 && bIsDiagonalEntry == true) {
1560  LO lNodeId = nodeMap->getLocalElement(nodeId);
1561  numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1562  if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1563  amalgBoundaryNodes[lNodeId] = true;
1564  }
1565 
1566  Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp(cnodeIds);
1567 
1568  if (arr_cnodeIds.size() > 0)
1569  crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1570  }
1571  // fill matrix graph
1572  crsGraph->fillComplete(nodeMap, nodeMap);
1573 
1574  // 5) create MueLu Graph object
1575  RCP<LWGraph> graph = rcp(new LWGraph(crsGraph, "amalgamated graph of A"));
1576 
1577  // Detect and record rows that correspond to Dirichlet boundary conditions
1578  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1579 
1580  if (GetVerbLevel() & Statistics1) {
1581  GO numLocalBoundaryNodes = 0;
1582  GO numGlobalBoundaryNodes = 0;
1583  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1584  if (amalgBoundaryNodes(i))
1585  numLocalBoundaryNodes++;
1586  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1587  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1588  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1589  }
1590 
1591  // 6) store results in Level
1592  // graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1593  Set(currentLevel, "DofsPerNode", blockdim);
1594  Set(currentLevel, "Graph", graph);
1595 
1596  } // if (doExperimentalWrap) ... else ...
1597 
1598 } // Build
1599 
1600 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1601 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1602  typedef typename ArrayView<const LO>::size_type size_type;
1603 
1604  // extract striding information
1605  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1606  if (A.IsView("stridedMaps") == true) {
1607  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1608  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1609  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1610  if (strMap->getStridedBlockId() > -1)
1611  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1612  }
1613 
1614  // count nonzero entries in all dof rows associated with node row
1615  size_t nnz = 0, pos = 0;
1616  for (LO j = 0; j < blkSize; j++)
1617  nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1618 
1619  if (nnz == 0) {
1620  cols.resize(0);
1621  return;
1622  }
1623 
1624  cols.resize(nnz);
1625 
1626  // loop over all local dof rows associated with local node "row"
1627  ArrayView<const LO> inds;
1628  ArrayView<const SC> vals;
1629  for (LO j = 0; j < blkSize; j++) {
1630  A.getLocalRowView(row * blkSize + j, inds, vals);
1631  size_type numIndices = inds.size();
1632 
1633  if (numIndices == 0) // skip empty dof rows
1634  continue;
1635 
1636  // cols: stores all local node ids for current local node id "row"
1637  cols[pos++] = translation[inds[0]];
1638  for (size_type k = 1; k < numIndices; k++) {
1639  LO nodeID = translation[inds[k]];
1640  // Here we try to speed up the process by reducing the size of an array
1641  // to sort. This works if the column nonzeros belonging to the same
1642  // node are stored consequently.
1643  if (nodeID != cols[pos - 1])
1644  cols[pos++] = nodeID;
1645  }
1646  }
1647  cols.resize(pos);
1648  nnz = pos;
1649 
1650  // Sort and remove duplicates
1651  std::sort(cols.begin(), cols.end());
1652  pos = 0;
1653  for (size_t j = 1; j < nnz; j++)
1654  if (cols[j] != cols[pos])
1655  cols[++pos] = cols[j];
1656  cols.resize(pos + 1);
1657 }
1658 
1659 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1660 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1661  typedef typename ArrayView<const LO>::size_type size_type;
1662  typedef Teuchos::ScalarTraits<SC> STS;
1663 
1664  // extract striding information
1665  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1666  if (A.IsView("stridedMaps") == true) {
1667  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1668  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1669  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1670  if (strMap->getStridedBlockId() > -1)
1671  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1672  }
1673 
1674  // count nonzero entries in all dof rows associated with node row
1675  size_t nnz = 0, pos = 0;
1676  for (LO j = 0; j < blkSize; j++)
1677  nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1678 
1679  if (nnz == 0) {
1680  cols.resize(0);
1681  return;
1682  }
1683 
1684  cols.resize(nnz);
1685 
1686  // loop over all local dof rows associated with local node "row"
1687  ArrayView<const LO> inds;
1688  ArrayView<const SC> vals;
1689  for (LO j = 0; j < blkSize; j++) {
1690  A.getLocalRowView(row * blkSize + j, inds, vals);
1691  size_type numIndices = inds.size();
1692 
1693  if (numIndices == 0) // skip empty dof rows
1694  continue;
1695 
1696  // cols: stores all local node ids for current local node id "row"
1697  LO prevNodeID = -1;
1698  for (size_type k = 0; k < numIndices; k++) {
1699  LO dofID = inds[k];
1700  LO nodeID = translation[inds[k]];
1701 
1702  // we avoid a square root by using squared values
1703  typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]); // eps^2 * |a_ii| * |a_jj|
1704  typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]);
1705 
1706  // check dropping criterion
1707  if (aij > aiiajj || (row * blkSize + j == dofID)) {
1708  // accept entry in graph
1709 
1710  // Here we try to speed up the process by reducing the size of an array
1711  // to sort. This works if the column nonzeros belonging to the same
1712  // node are stored consequently.
1713  if (nodeID != prevNodeID) {
1714  cols[pos++] = nodeID;
1715  prevNodeID = nodeID;
1716  }
1717  }
1718  }
1719  }
1720  cols.resize(pos);
1721  nnz = pos;
1722 
1723  // Sort and remove duplicates
1724  std::sort(cols.begin(), cols.end());
1725  pos = 0;
1726  for (size_t j = 1; j < nnz; j++)
1727  if (cols[j] != cols[pos])
1728  cols[++pos] = cols[j];
1729  cols.resize(pos + 1);
1730 
1731  return;
1732 }
1733 
1734 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1736  typedef Teuchos::ScalarTraits<SC> STS;
1737 
1738  const ParameterList& pL = GetParameterList();
1739  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1740  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1741 
1742  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
1743  RCP<LocalOrdinalVector> ghostedBlockNumber;
1744  GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
1745 
1746  // Ghost the column block numbers if we need to
1747  RCP<const Import> importer = A->getCrsGraph()->getImporter();
1748  if (!importer.is_null()) {
1749  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1750  ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
1751  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1752  } else {
1753  ghostedBlockNumber = BlockNumber;
1754  }
1755 
1756  // Accessors for block numbers
1757  Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1758  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1759 
1760  // allocate space for the local graph
1761  typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type rows_mat;
1762  typename LWGraph::row_type::non_const_type rows_graph;
1763  typename LWGraph::entries_type::non_const_type columns;
1764  typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type values;
1765  RCP<CrsMatrixWrap> crs_matrix_wrap;
1766 
1767  if (generate_matrix) {
1768  crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1769  rows_mat = typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type("rows_mat", A->getLocalNumRows() + 1);
1770  } else {
1771  rows_graph = typename LWGraph::row_type::non_const_type("rows_graph", A->getLocalNumRows() + 1);
1772  }
1773  columns = typename LWGraph::entries_type::non_const_type("columns", A->getLocalNumEntries());
1774  values = typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type("values", A->getLocalNumEntries());
1775 
1776  LO realnnz = 0;
1777  GO numDropped = 0, numTotal = 0;
1778  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1779  LO row_block = row_block_number[row];
1780  size_t nnz = A->getNumEntriesInLocalRow(row);
1781  ArrayView<const LO> indices;
1782  ArrayView<const SC> vals;
1783  A->getLocalRowView(row, indices, vals);
1784 
1785  LO rownnz = 0;
1786  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1787  LO col = indices[colID];
1788  LO col_block = col_block_number[col];
1789 
1790  if (row_block == col_block) {
1791  if (generate_matrix) values[realnnz] = vals[colID];
1792  columns[realnnz++] = col;
1793  rownnz++;
1794  } else
1795  numDropped++;
1796  }
1797  if (generate_matrix)
1798  rows_mat[row + 1] = realnnz;
1799  else
1800  rows_graph[row + 1] = realnnz;
1801  }
1802 
1803  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1804  if (rowSumTol > 0.)
1805  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
1806 
1807  numTotal = A->getLocalNumEntries();
1808 
1809  if (GetVerbLevel() & Statistics1) {
1810  GO numLocalBoundaryNodes = 0;
1811  GO numGlobalBoundaryNodes = 0;
1812  for (size_t i = 0; i < boundaryNodes.size(); ++i)
1813  if (boundaryNodes(i))
1814  numLocalBoundaryNodes++;
1815  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1816  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1817  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1818 
1819  GO numGlobalTotal, numGlobalDropped;
1820  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1821  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1822  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1823  if (numGlobalTotal != 0)
1824  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1825  GetOStream(Statistics1) << std::endl;
1826  }
1827 
1828  Set(currentLevel, "Filtering", true);
1829 
1830  if (generate_matrix) {
1831  // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1832  // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1833  // here, which is legit, because we never use them anyway.
1834  if constexpr (std::is_same<typename LWGraph::row_type,
1835  typename CrsMatrix::local_matrix_type::row_map_type>::value) {
1836  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values);
1837  } else {
1838  auto rows_mat2 = typename CrsMatrix::local_matrix_type::row_map_type::non_const_type("rows_mat2", rows_mat.extent(0));
1839  Kokkos::deep_copy(rows_mat2, rows_mat);
1840  auto columns2 = typename CrsMatrix::local_graph_type::entries_type::non_const_type("columns2", columns.extent(0));
1841  Kokkos::deep_copy(columns2, columns);
1842  auto values2 = typename CrsMatrix::local_matrix_type::values_type::non_const_type("values2", values.extent(0));
1843  Kokkos::deep_copy(values2, values);
1844  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat2, columns2, values2);
1845  }
1846  crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1847  } else {
1848  RCP<LWGraph> graph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1849  graph->SetBoundaryNodeMap(boundaryNodes);
1850  Set(currentLevel, "Graph", graph);
1851  }
1852 
1853  Set(currentLevel, "DofsPerNode", 1);
1854  return crs_matrix_wrap;
1855 }
1856 
1857 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1859  TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1860  const ParameterList& pL = GetParameterList();
1861 
1862  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1863 
1864  GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1865  if (localizeColoringGraph)
1866  GetOStream(Statistics1) << ", with localization" << std::endl;
1867  else
1868  GetOStream(Statistics1) << ", without localization" << std::endl;
1869 
1870  // Accessors for block numbers
1871  Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1872  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1873 
1874  // allocate space for the local graph
1875  ArrayRCP<size_t> rows_mat;
1876  typename LWGraph::row_type::non_const_type rows_graph("rows_graph", inputGraph->GetNodeNumVertices() + 1);
1877  typename LWGraph::entries_type::non_const_type columns("columns", inputGraph->GetNodeNumEdges());
1878 
1879  LO realnnz = 0;
1880  GO numDropped = 0, numTotal = 0;
1881  const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getLocalNumElements());
1882  if (localizeColoringGraph) {
1883  for (LO row = 0; row < numRows; ++row) {
1884  LO row_block = row_block_number[row];
1885  auto indices = inputGraph->getNeighborVertices(row);
1886 
1887  LO rownnz = 0;
1888  for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1889  LO col = indices(colID);
1890  LO col_block = col_block_number[col];
1891 
1892  if ((row_block == col_block) && (col < numRows)) {
1893  columns(realnnz++) = col;
1894  rownnz++;
1895  } else
1896  numDropped++;
1897  }
1898  rows_graph(row + 1) = realnnz;
1899  }
1900  } else {
1901  // ghosting of boundary node map
1902  auto boundaryNodes = inputGraph->GetBoundaryNodeMap();
1904  for (size_t i = 0; i < inputGraph->GetNodeNumVertices(); i++)
1905  boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1906  // Xpetra::IO<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1908  boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
1909  auto boundaryColumn = boundaryColumnVector->getData(0);
1910 
1911  for (LO row = 0; row < numRows; ++row) {
1912  LO row_block = row_block_number[row];
1913  auto indices = inputGraph->getNeighborVertices(row);
1914 
1915  LO rownnz = 0;
1916  for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1917  LO col = indices(colID);
1918  LO col_block = col_block_number[col];
1919 
1920  if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1921  columns(realnnz++) = col;
1922  rownnz++;
1923  } else
1924  numDropped++;
1925  }
1926  rows_graph(row + 1) = realnnz;
1927  }
1928  }
1929 
1930  numTotal = inputGraph->GetNodeNumEdges();
1931 
1932  if (GetVerbLevel() & Statistics1) {
1933  RCP<const Teuchos::Comm<int>> comm = inputGraph->GetDomainMap()->getComm();
1934  GO numGlobalTotal, numGlobalDropped;
1935  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1936  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1937  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1938  if (numGlobalTotal != 0)
1939  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1940  GetOStream(Statistics1) << std::endl;
1941  }
1942 
1943  if (localizeColoringGraph) {
1944  outputGraph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1945  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1946  } else {
1947  TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
1948 #ifdef HAVE_XPETRA_TPETRA
1949  auto outputGraph2 = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1950 
1951  auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
1953  auto tpGraphSym = sym->symmetrize();
1954  auto lclGraphSym = tpGraphSym->getLocalGraphHost();
1955  auto colIndsSym = lclGraphSym.entries;
1956 
1957  auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
1958  typename LWGraph::row_type::non_const_type rows_graphSym("rows_graphSym", rowsSym.size());
1959  for (size_t row = 0; row < rowsSym.size(); row++)
1960  rows_graphSym(row) = rowsSym(row);
1961  outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
1962  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1963 #endif
1964  }
1965 }
1966 
1967 } // namespace MueLu
1968 
1969 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
Important warning messages (one line)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Exception indicating invalid cast attempted.
void reserve(size_type n)
void BlockDiagonalizeGraph(const RCP< LWGraph > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< LWGraph > &outputGraph, RCP< const Import > &importer) const
#define MueLu_sumAll(rcpComm, in, out)
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
void setValidator(RCP< const ParameterEntryValidator > const &validator)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
Print more statistics.
size_type size() const
DropTol & operator=(DropTol const &)=default
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
LocalOrdinal LO
One-liner description of what is happening.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
size_type size() const
Exception throws to report incompatible objects (like maps).
static Teuchos::ArrayRCP< Magnitude > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
Scalar GetThreshold() const
Return threshold value.
Additional warnings.
void resize(const size_type n, const T &val=T())
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
#define SET_VALID_ENTRY(name)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void resize(size_type new_size, const value_type &x=value_type())
void Build(Level &currentLevel) const
Build an object with this factory.
std::string viewLabel_t
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
iterator end()
void DeclareInput(Level &currentLevel) const
Input.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
Print class parameters.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > toTpetra(const RCP< const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &graph)
void push_back(const value_type &x)
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
RCP< const CrsGraph< int, GlobalOrdinal, Node > > toXpetra(const Epetra_CrsGraph &g)
size_type size() const
Scalar SC
const RCP< const Map > GetDomainMap() const
Lightweight MueLu representation of a compressed row storage graph.
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
ParameterEntry & getEntry(const std::string &name)
bool is_null() const
iterator begin()
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
bool is_null() const