MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_FilteredAFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_FILTEREDAFACTORY_DEF_HPP
47 #define MUELU_FILTEREDAFACTORY_DEF_HPP
48 
49 #include <Xpetra_Matrix.hpp>
50 #include <Xpetra_MatrixFactory.hpp>
51 #include <Xpetra_IO.hpp>
52 
54 
55 #include "MueLu_Level.hpp"
56 #include "MueLu_MasterList.hpp"
57 #include "MueLu_Monitor.hpp"
58 #include "MueLu_Aggregates.hpp"
59 #include "MueLu_AmalgamationInfo.hpp"
60 #include "MueLu_Utilities.hpp"
61 
62 // Variable to enable lots of debug output
63 #define MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING 0
64 
65 namespace MueLu {
66 
67 template <class T>
68 void sort_and_unique(T& array) {
69  std::sort(array.begin(), array.end());
70  std::unique(array.begin(), array.end());
71 }
72 
73 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
75  RCP<ParameterList> validParamList = rcp(new ParameterList());
76 
77 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
78  SET_VALID_ENTRY("filtered matrix: use lumping");
79  SET_VALID_ENTRY("filtered matrix: reuse graph");
80  SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
81  SET_VALID_ENTRY("filtered matrix: use root stencil");
82  SET_VALID_ENTRY("filtered matrix: use spread lumping");
83  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
84  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
85  SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
86 #undef SET_VALID_ENTRY
87 
88  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used for filtering");
89  validParamList->set<RCP<const FactoryBase> >("Graph", Teuchos::null, "Generating factory for coalesced filtered graph");
90  validParamList->set<RCP<const FactoryBase> >("Filtering", Teuchos::null, "Generating factory for filtering boolean");
91 
92  // Only need these for the "use root stencil" option
93  validParamList->set<RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
94  validParamList->set<RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
95  return validParamList;
96 }
97 
98 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
100  Input(currentLevel, "A");
101  Input(currentLevel, "Filtering");
102  Input(currentLevel, "Graph");
103  const ParameterList& pL = GetParameterList();
104  if (pL.isParameter("filtered matrix: use root stencil") && pL.get<bool>("filtered matrix: use root stencil") == true) {
105  Input(currentLevel, "Aggregates");
106  Input(currentLevel, "UnAmalgamationInfo");
107  }
108 }
109 
110 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
112  FactoryMonitor m(*this, "Matrix filtering", currentLevel);
113 
114  RCP<Matrix> A = Get<RCP<Matrix> >(currentLevel, "A");
115  if (Get<bool>(currentLevel, "Filtering") == false) {
116  GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl;
117  Set(currentLevel, "A", A);
118  return;
119  }
120 
121  const ParameterList& pL = GetParameterList();
122  bool lumping = pL.get<bool>("filtered matrix: use lumping");
123  if (lumping)
124  GetOStream(Runtime0) << "Lumping dropped entries" << std::endl;
125 
126  bool use_spread_lumping = pL.get<bool>("filtered matrix: use spread lumping");
127  if (use_spread_lumping && (!lumping))
128  throw std::runtime_error("Must also request 'filtered matrix: use lumping' in order to use spread lumping");
129 
130  if (use_spread_lumping) {
131  GetOStream(Runtime0) << "using spread lumping " << std::endl;
132  }
133 
134  double DdomAllowGrowthRate = 1.1;
135  double DdomCap = 2.0;
136  if (use_spread_lumping) {
137  DdomAllowGrowthRate = pL.get<double>("filtered matrix: spread lumping diag dom growth factor");
138  DdomCap = pL.get<double>("filtered matrix: spread lumping diag dom cap");
139  }
140  bool use_root_stencil = lumping && pL.get<bool>("filtered matrix: use root stencil");
141  if (use_root_stencil)
142  GetOStream(Runtime0) << "Using root stencil for dropping" << std::endl;
143  double dirichlet_threshold = pL.get<double>("filtered matrix: Dirichlet threshold");
144  if (dirichlet_threshold >= 0.0)
145  GetOStream(Runtime0) << "Filtering Dirichlet threshold of " << dirichlet_threshold << std::endl;
146 
147  if (use_root_stencil || pL.get<bool>("filtered matrix: reuse graph"))
148  GetOStream(Runtime0) << "Reusing graph" << std::endl;
149  else
150  GetOStream(Runtime0) << "Generating new graph" << std::endl;
151 
152  RCP<LWGraph> G = Get<RCP<LWGraph> >(currentLevel, "Graph");
154  FILE* f = fopen("graph.dat", "w");
155  size_t numGRows = G->GetNodeNumVertices();
156  for (size_t i = 0; i < numGRows; i++) {
157  // Set up filtering array
158  auto indsG = G->getNeighborVertices(i);
159  for (size_t j = 0; j < (size_t)indsG.length; j++) {
160  fprintf(f, "%d %d 1.0\n", (int)i, (int)indsG(j));
161  }
162  }
163  fclose(f);
164  }
165 
166  RCP<ParameterList> fillCompleteParams(new ParameterList);
167  fillCompleteParams->set("No Nonlocal Changes", true);
168 
169  RCP<Matrix> filteredA;
170  if (use_root_stencil) {
171  filteredA = MatrixFactory::Build(A->getCrsGraph());
172  filteredA->fillComplete(fillCompleteParams);
173  filteredA->resumeFill();
174  BuildNewUsingRootStencil(*A, *G, dirichlet_threshold, currentLevel, *filteredA, use_spread_lumping, DdomAllowGrowthRate, DdomCap);
175  filteredA->fillComplete(fillCompleteParams);
176 
177  } else if (pL.get<bool>("filtered matrix: reuse graph")) {
178  filteredA = MatrixFactory::Build(A->getCrsGraph());
179  filteredA->resumeFill();
180  BuildReuse(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, *filteredA);
181  // only lump inside BuildReuse if lumping is true and use_spread_lumping is false
182  // note: they use_spread_lumping cannot be true if lumping is false
183 
184  if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap);
185  filteredA->fillComplete(fillCompleteParams);
186 
187  } else {
188  filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries());
189  BuildNew(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, *filteredA);
190  // only lump inside BuildNew if lumping is true and use_spread_lumping is false
191  // note: they use_spread_lumping cannot be true if lumping is false
192  if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap);
193  filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams);
194  }
195 
197  Xpetra::IO<SC, LO, GO, NO>::Write("filteredA.dat", *filteredA);
198 
199  // original filtered A and actual A
201  RCP<Matrix> origFilteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries());
202  BuildNew(*A, *G, lumping, dirichlet_threshold, *origFilteredA);
203  if (use_spread_lumping) ExperimentalLumping(*A, *origFilteredA, DdomAllowGrowthRate, DdomCap);
204  origFilteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams);
205  Xpetra::IO<SC, LO, GO, NO>::Write("origFilteredA.dat", *origFilteredA);
206  }
207 
208  filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
209 
210  if (pL.get<bool>("filtered matrix: reuse eigenvalue")) {
211  // Reuse max eigenvalue from A
212  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
213  // the D^{-1}A estimate in A, may as well use it.
214  // NOTE: ML does that too
215  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
216  }
217 
218  Set(currentLevel, "A", filteredA);
219 }
220 
221 // Epetra's API allows direct access to row array.
222 // Tpetra's API does not, providing only ArrayView<const .>
223 // But in most situations we are currently interested in, it is safe to assume
224 // that the view is to the actual data. So this macro directs the code to do
225 // const_cast, and modify the entries directly. This allows us to avoid
226 // replaceLocalValues() call which is quite expensive due to all the searches.
227 //#define ASSUME_DIRECT_ACCESS_TO_ROW // See github issue 10883#issuecomment-1256676340
228 
229 // Both Epetra and Tpetra matrix-matrix multiply use the following trick:
230 // if an entry of the left matrix is zero, it does not compute or store the
231 // zero value.
232 //
233 // This trick allows us to bypass constructing a new matrix. Instead, we
234 // make a deep copy of the original one, and fill it in with zeros, which
235 // are ignored during the prolongator smoothing.
236 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
238  BuildReuse(const Matrix& A, const LWGraph& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const {
239  using TST = typename Teuchos::ScalarTraits<SC>;
240  SC zero = TST::zero();
241 
242  size_t blkSize = A.GetFixedBlockSize();
243 
244  ArrayView<const LO> inds;
245  ArrayView<const SC> valsA;
246 #ifdef ASSUME_DIRECT_ACCESS_TO_ROW
247  ArrayView<SC> vals;
248 #else
249  Array<SC> vals;
250 #endif
251 
252  Array<char> filter(std::max(blkSize * G.GetImportMap()->getLocalNumElements(),
253  A.getColMap()->getLocalNumElements()),
254  0);
255 
256  size_t numGRows = G.GetNodeNumVertices();
257  for (size_t i = 0; i < numGRows; i++) {
258  // Set up filtering array
259  auto indsG = G.getNeighborVertices(i);
260  for (size_t j = 0; j < as<size_t>(indsG.length); j++)
261  for (size_t k = 0; k < blkSize; k++)
262  filter[indsG(j) * blkSize + k] = 1;
263 
264  for (size_t k = 0; k < blkSize; k++) {
265  LO row = i * blkSize + k;
266 
267  A.getLocalRowView(row, inds, valsA);
268 
269  size_t nnz = inds.size();
270  if (nnz == 0)
271  continue;
272 
273 #ifdef ASSUME_DIRECT_ACCESS_TO_ROW
274  // Transform ArrayView<const SC> into ArrayView<SC>
275  ArrayView<const SC> vals1;
276  filteredA.getLocalRowView(row, inds, vals1);
277  vals = ArrayView<SC>(const_cast<SC*>(vals1.getRawPtr()), nnz);
278 
279  memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz * sizeof(SC));
280 #else
281  vals = Array<SC>(valsA);
282 #endif
283 
285  // SC ONE = Teuchos::ScalarTraits<SC>::one();
286  SC A_rowsum = ZERO, F_rowsum = ZERO;
287  for (LO l = 0; l < (LO)inds.size(); l++)
288  A_rowsum += valsA[l];
289 
290  if (lumping == false) {
291  for (size_t j = 0; j < nnz; j++)
292  if (!filter[inds[j]])
293  vals[j] = zero;
294 
295  } else {
296  LO diagIndex = -1;
297  SC diagExtra = zero;
298 
299  for (size_t j = 0; j < nnz; j++) {
300  if (filter[inds[j]]) {
301  if (inds[j] == row) {
302  // Remember diagonal position
303  diagIndex = j;
304  }
305  continue;
306  }
307 
308  diagExtra += vals[j];
309 
310  vals[j] = zero;
311  }
312 
313  // Lump dropped entries
314  // NOTE
315  // * Does it make sense to lump for elasticity?
316  // * Is it different for diffusion and elasticity?
317  // SC diagA = ZERO;
318  if (diagIndex != -1) {
319  // diagA = vals[diagIndex];
320  vals[diagIndex] += diagExtra;
321  if (dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) {
322  // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA);
323  for (LO l = 0; l < (LO)nnz; l++)
324  F_rowsum += vals[l];
325  // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum);
326  vals[diagIndex] = TST::one();
327  }
328  }
329  }
330 
331 #ifndef ASSUME_DIRECT_ACCESS_TO_ROW
332  // Because we used a column map in the construction of the matrix
333  // we can just use insertLocalValues here instead of insertGlobalValues
334  filteredA.replaceLocalValues(row, inds, vals);
335 #endif
336  }
337 
338  // Reset filtering array
339  for (size_t j = 0; j < as<size_t>(indsG.length); j++)
340  for (size_t k = 0; k < blkSize; k++)
341  filter[indsG(j) * blkSize + k] = 0;
342  }
343 }
344 
345 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
347  BuildNew(const Matrix& A, const LWGraph& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const {
348  using TST = typename Teuchos::ScalarTraits<SC>;
350 
351  size_t blkSize = A.GetFixedBlockSize();
352 
353  ArrayView<const LO> indsA;
354  ArrayView<const SC> valsA;
355  Array<LO> inds;
356  Array<SC> vals;
357 
358  Array<char> filter(blkSize * G.GetImportMap()->getLocalNumElements(), 0);
359 
360  size_t numGRows = G.GetNodeNumVertices();
361  for (size_t i = 0; i < numGRows; i++) {
362  // Set up filtering array
363  auto indsG = G.getNeighborVertices(i);
364  for (size_t j = 0; j < as<size_t>(indsG.length); j++)
365  for (size_t k = 0; k < blkSize; k++)
366  filter[indsG(j) * blkSize + k] = 1;
367 
368  for (size_t k = 0; k < blkSize; k++) {
369  LO row = i * blkSize + k;
370 
371  A.getLocalRowView(row, indsA, valsA);
372 
373  size_t nnz = indsA.size();
374  if (nnz == 0)
375  continue;
376 
377  inds.resize(indsA.size());
378  vals.resize(valsA.size());
379 
380  size_t numInds = 0;
381  if (lumping == false) {
382  for (size_t j = 0; j < nnz; j++)
383  if (filter[indsA[j]]) {
384  inds[numInds] = indsA[j];
385  vals[numInds] = valsA[j];
386  numInds++;
387  }
388 
389  } else {
390  LO diagIndex = -1;
391  SC diagExtra = zero;
392 
393  for (size_t j = 0; j < nnz; j++) {
394  if (filter[indsA[j]]) {
395  inds[numInds] = indsA[j];
396  vals[numInds] = valsA[j];
397 
398  // Remember diagonal position
399  if (inds[numInds] == row)
400  diagIndex = numInds;
401 
402  numInds++;
403 
404  } else {
405  diagExtra += valsA[j];
406  }
407  }
408 
409  // Lump dropped entries
410  // NOTE
411  // * Does it make sense to lump for elasticity?
412  // * Is it different for diffusion and elasticity?
413  if (diagIndex != -1) {
414  vals[diagIndex] += diagExtra;
415  if (dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) {
416  // SC A_rowsum = ZERO, F_rowsum = ZERO;
417  // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA);
418  // for(LO l = 0; l < (LO)nnz; l++)
419  // F_rowsum += vals[l];
420  // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum);
421  vals[diagIndex] = TST::one();
422  }
423  }
424  }
425  inds.resize(numInds);
426  vals.resize(numInds);
427 
428  // Because we used a column map in the construction of the matrix
429  // we can just use insertLocalValues here instead of insertGlobalValues
430  filteredA.insertLocalValues(row, inds, vals);
431  }
432 
433  // Reset filtering array
434  for (size_t j = 0; j < as<size_t>(indsG.length); j++)
435  for (size_t k = 0; k < blkSize; k++)
436  filter[indsG(j) * blkSize + k] = 0;
437  }
438 }
439 
440 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
442  BuildNewUsingRootStencil(const Matrix& A, const LWGraph& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const {
443  using TST = typename Teuchos::ScalarTraits<SC>;
444  using Teuchos::arcp_const_cast;
448 
449  // In the badAggNeighbors loop, if the entry has any number besides NAN, I add it to the diagExtra and then zero the guy.
450  RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(currentLevel, "Aggregates");
451  RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
452  LO numAggs = aggregates->GetNumAggregates();
453 
454  size_t numNodes = G.GetNodeNumVertices();
455  size_t blkSize = A.GetFixedBlockSize();
456  size_t numRows = A.getMap()->getLocalNumElements();
457  ArrayView<const LO> indsA;
458  ArrayView<const SC> valsA;
459  ArrayRCP<const size_t> rowptr;
460  ArrayRCP<const LO> inds;
461  ArrayRCP<const SC> vals_const;
462  ArrayRCP<SC> vals;
463 
464  // We're going to grab the vals array from filteredA and then blitz it with NAN as a placeholder for "entries that have
465  // not yey been touched." If I see an entry in the primary loop that has a zero, then I assume it has been nuked by
466  // it's symmetric pair, so I add it to the diagonal. If it has a NAN, process as normal.
467  RCP<CrsMatrix> filteredAcrs = dynamic_cast<const CrsMatrixWrap*>(&filteredA)->getCrsMatrix();
468  filteredAcrs->getAllValues(rowptr, inds, vals_const);
469  vals = arcp_const_cast<SC>(vals_const);
470  Array<bool> vals_dropped_indicator(vals.size(), false);
471 
472  // Check map nesting
473  RCP<const Map> rowMap = A.getRowMap();
474  RCP<const Map> colMap = A.getColMap();
475  bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
476  TEUCHOS_TEST_FOR_EXCEPTION(!goodMap, Exceptions::RuntimeError, "FilteredAFactory: Maps are not nested");
477 
478  // Since we're going to symmetrize this
479  Array<LO> diagIndex(numRows, INVALID);
480  Array<SC> diagExtra(numRows, ZERO);
481 
482  // Lists of nodes in each aggregate
483  struct {
484  // GH: For now, copy everything to host until we properly set this factory to run device code
485  // Instead, we'll copy data into HostMirrors and run the algorithms on host, saving optimization for later.
486  typename Aggregates::LO_view ptr, nodes, unaggregated;
487  typename Aggregates::LO_view::HostMirror ptr_h, nodes_h, unaggregated_h;
488  } nodesInAgg;
489  aggregates->ComputeNodesInAggregate(nodesInAgg.ptr, nodesInAgg.nodes, nodesInAgg.unaggregated);
490  nodesInAgg.ptr_h = Kokkos::create_mirror_view(nodesInAgg.ptr);
491  nodesInAgg.nodes_h = Kokkos::create_mirror_view(nodesInAgg.nodes);
492  nodesInAgg.unaggregated_h = Kokkos::create_mirror_view(nodesInAgg.unaggregated);
493  Kokkos::deep_copy(nodesInAgg.ptr_h, nodesInAgg.ptr);
494  Kokkos::deep_copy(nodesInAgg.nodes_h, nodesInAgg.nodes);
495  Kokkos::deep_copy(nodesInAgg.unaggregated_h, nodesInAgg.unaggregated);
496  Teuchos::ArrayRCP<const LO> vertex2AggId = aggregates->GetVertex2AggId()->getData(0); // GH: this is needed on device, grab the pointer after we call ComputeNodesInAggregate
497 
498  LO graphNumCols = G.GetImportMap()->getLocalNumElements();
499  Array<bool> filter(graphNumCols, false);
500 
501  // Loop over the unaggregated nodes. Blitz those rows. We don't want to smooth singletons.
502  for (LO i = 0; i < (LO)nodesInAgg.unaggregated_h.extent(0); i++) {
503  for (LO m = 0; m < (LO)blkSize; m++) {
504  LO row = amalgInfo->ComputeLocalDOF(nodesInAgg.unaggregated_h(i), m);
505  if (row >= (LO)numRows) continue;
506  size_t index_start = rowptr[row];
507  A.getLocalRowView(row, indsA, valsA);
508  for (LO k = 0; k < (LO)indsA.size(); k++) {
509  if (row == indsA[k]) {
510  vals[index_start + k] = ONE;
511  diagIndex[row] = k;
512  } else
513  vals[index_start + k] = ZERO;
514  }
515  }
516  } // end nodesInAgg.unaggregated.extent(0);
517 
518  std::vector<LO> badCount(numAggs, 0);
519 
520  // Find the biggest aggregate size in *nodes*
521  LO maxAggSize = 0;
522  for (LO i = 0; i < numAggs; i++)
523  maxAggSize = std::max(maxAggSize, nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i));
524 
525  // Loop over all the aggregates
526  std::vector<LO> goodAggNeighbors(G.getLocalMaxNumRowEntries());
527  std::vector<LO> badAggNeighbors(std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes));
528 
529  size_t numNewDrops = 0;
530  size_t numOldDrops = 0;
531  size_t numFixedDiags = 0;
532  size_t numSymDrops = 0;
533 
534  for (LO i = 0; i < numAggs; i++) {
535  LO numNodesInAggregate = nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i);
536  if (numNodesInAggregate == 0) continue;
537 
538  // Find the root *node*
539  LO root_node = INVALID;
540  for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
541  if (aggregates->IsRoot(nodesInAgg.nodes_h(k))) {
542  root_node = nodesInAgg.nodes_h(k);
543  break;
544  }
545  }
546 
547  TEUCHOS_TEST_FOR_EXCEPTION(root_node == INVALID,
548  Exceptions::RuntimeError, "MueLu::FilteredAFactory::BuildNewUsingRootStencil: Cannot find root node");
549 
550  // Find the list of "good" node neighbors (aka nodes which border the root node in the Graph G)
551  auto goodNodeNeighbors = G.getNeighborVertices(root_node);
552 
553  // Now find the list of "good" aggregate neighbors (aka the aggregates neighbor the root node in the Graph G)
554  goodAggNeighbors.resize(0);
555  for (LO k = 0; k < (LO)goodNodeNeighbors.length; k++) {
556  goodAggNeighbors.push_back(vertex2AggId[goodNodeNeighbors(k)]);
557  }
558  sort_and_unique(goodAggNeighbors);
559 
560  // Now we get the list of "bad" aggregate neighbors (aka aggregates which border the
561  // root node in the original matrix A, which are not goodNodeNeighbors). Since we
562  // don't have an amalgamated version of the original matrix, we use the matrix directly
563  badAggNeighbors.resize(0);
564  for (LO j = 0; j < (LO)blkSize; j++) {
565  LO row = amalgInfo->ComputeLocalDOF(root_node, j);
566  if (row >= (LO)numRows) continue;
567  A.getLocalRowView(row, indsA, valsA);
568  for (LO k = 0; k < (LO)indsA.size(); k++) {
569  if ((indsA[k] < (LO)numRows) && (TST::magnitude(valsA[k]) != TST::magnitude(ZERO))) {
570  LO node = amalgInfo->ComputeLocalNode(indsA[k]);
571  LO agg = vertex2AggId[node];
572  if (!std::binary_search(goodAggNeighbors.begin(), goodAggNeighbors.end(), agg))
573  badAggNeighbors.push_back(agg);
574  }
575  }
576  }
577  sort_and_unique(badAggNeighbors);
578 
579  // Go through the filtered graph and count the number of connections to the badAggNeighbors
580  // if there are 2 or more of these connections, remove them from the bad list.
581 
582  for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
583  auto nodeNeighbors = G.getNeighborVertices(k);
584  for (LO kk = 0; kk < nodeNeighbors.length; kk++) {
585  if ((vertex2AggId[nodeNeighbors(kk)] >= 0) && (vertex2AggId[nodeNeighbors(kk)] < numAggs))
586  (badCount[vertex2AggId[nodeNeighbors(kk)]])++;
587  }
588  }
589  std::vector<LO> reallyBadAggNeighbors(std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes));
590  reallyBadAggNeighbors.resize(0);
591  for (LO k = 0; k < (LO)badAggNeighbors.size(); k++) {
592  if (badCount[badAggNeighbors[k]] <= 1) reallyBadAggNeighbors.push_back(badAggNeighbors[k]);
593  }
594  for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
595  auto nodeNeighbors = G.getNeighborVertices(k);
596  for (LO kk = 0; kk < nodeNeighbors.length; kk++) {
597  if ((vertex2AggId[nodeNeighbors(kk)] >= 0) && (vertex2AggId[nodeNeighbors(kk)] < numAggs))
598  badCount[vertex2AggId[nodeNeighbors(kk)]] = 0;
599  }
600  }
601 
602  // For each of the reallyBadAggNeighbors, we go and blitz their connections to dofs in this aggregate.
603  // We remove the INVALID marker when we do this so we don't wind up doubling this up later
604  for (LO b = 0; b < (LO)reallyBadAggNeighbors.size(); b++) {
605  LO bad_agg = reallyBadAggNeighbors[b];
606  for (LO k = nodesInAgg.ptr_h(bad_agg); k < nodesInAgg.ptr_h(bad_agg + 1); k++) {
607  LO bad_node = nodesInAgg.nodes_h(k);
608  for (LO j = 0; j < (LO)blkSize; j++) {
609  LO bad_row = amalgInfo->ComputeLocalDOF(bad_node, j);
610  if (bad_row >= (LO)numRows) continue;
611  size_t index_start = rowptr[bad_row];
612  A.getLocalRowView(bad_row, indsA, valsA);
613  for (LO l = 0; l < (LO)indsA.size(); l++) {
614  if (indsA[l] < (LO)numRows && vertex2AggId[amalgInfo->ComputeLocalNode(indsA[l])] == i && vals_dropped_indicator[index_start + l] == false) {
615  vals_dropped_indicator[index_start + l] = true;
616  vals[index_start + l] = ZERO;
617  diagExtra[bad_row] += valsA[l];
618  numSymDrops++;
619  }
620  }
621  }
622  }
623  }
624 
625  // Now lets fill the rows in this aggregate and figure out the diagonal lumping
626  // We loop over each node in the aggregate and then over the neighbors of that node
627 
628  for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
629  LO row_node = nodesInAgg.nodes_h(k);
630 
631  // Set up filtering array
632  auto indsG = G.getNeighborVertices(row_node);
633  for (size_t j = 0; j < as<size_t>(indsG.length); j++)
634  filter[indsG(j)] = true;
635 
636  for (LO m = 0; m < (LO)blkSize; m++) {
637  LO row = amalgInfo->ComputeLocalDOF(row_node, m);
638  if (row >= (LO)numRows) continue;
639  size_t index_start = rowptr[row];
640  A.getLocalRowView(row, indsA, valsA);
641 
642  for (LO l = 0; l < (LO)indsA.size(); l++) {
643  int col_node = amalgInfo->ComputeLocalNode(indsA[l]);
644  bool is_good = filter[col_node];
645  if (indsA[l] == row) {
646  diagIndex[row] = l;
647  vals[index_start + l] = valsA[l];
648  continue;
649  }
650 
651  // If we've already dropped this guy (from symmetry above), then continue onward
652  if (vals_dropped_indicator[index_start + l] == true) {
653  if (is_good)
654  numOldDrops++;
655  else
656  numNewDrops++;
657  continue;
658  }
659 
660  // FIXME: I'm assuming vertex2AggId is only length of the rowmap, so
661  // we won'd do secondary dropping on off-processor neighbors
662  if (is_good && indsA[l] < (LO)numRows) {
663  int agg = vertex2AggId[col_node];
664  if (std::binary_search(reallyBadAggNeighbors.begin(), reallyBadAggNeighbors.end(), agg))
665  is_good = false;
666  }
667 
668  if (is_good) {
669  vals[index_start + l] = valsA[l];
670  } else {
671  if (!filter[col_node])
672  numOldDrops++;
673  else
674  numNewDrops++;
675  diagExtra[row] += valsA[l];
676  vals[index_start + l] = ZERO;
677  vals_dropped_indicator[index_start + l] = true;
678  }
679  } // end for l "indsA.size()" loop
680 
681  } // end m "blkSize" loop
682 
683  // Clear filtering array
684  for (size_t j = 0; j < as<size_t>(indsG.length); j++)
685  filter[indsG(j)] = false;
686 
687  } // end k loop over number of nodes in this agg
688  } // end i loop over numAggs
689 
690  if (!use_spread_lumping) {
691  // Now do the diagonal modifications in one, final pass
692  for (LO row = 0; row < (LO)numRows; row++) {
693  if (diagIndex[row] != INVALID) {
694  size_t index_start = rowptr[row];
695  size_t diagIndexInMatrix = index_start + diagIndex[row];
696  // printf("diag_vals pre update = %8.2e\n", vals[diagIndex] );
697  vals[diagIndexInMatrix] += diagExtra[row];
698  SC A_rowsum = ZERO, A_absrowsum = ZERO, F_rowsum = ZERO;
699 
700  if ((dirichletThresh >= 0.0 && TST::real(vals[diagIndexInMatrix]) <= dirichletThresh) || TST::real(vals[diagIndexInMatrix]) == ZERO) {
702  A.getLocalRowView(row, indsA, valsA);
703  // SC diagA = valsA[diagIndex[row]];
704  // printf("WARNING: row %d (diagIndex=%d) diag(Afiltered) = %8.2e diag(A)=%8.2e numInds = %d\n",row,diagIndex[row],vals[diagIndexInMatrix],diagA,(LO)indsA.size());
705 
706  for (LO l = 0; l < (LO)indsA.size(); l++) {
707  A_rowsum += valsA[l];
708  A_absrowsum += std::abs(valsA[l]);
709  }
710  for (LO l = 0; l < (LO)indsA.size(); l++)
711  F_rowsum += vals[index_start + l];
712  // printf(" : A rowsum = %8.2e |A| rowsum = %8.2e rowsum = %8.2e\n",A_rowsum,A_absrowsum,F_rowsum);
714  // printf(" Avals =");
715  // for(LO l = 0; l < (LO)indsA.size(); l++)
716  // printf("%d(%8.2e)[%d] ",(LO)indsA[l],valsA[l],(LO)l);
717  // printf("\n");
718  // printf(" Fvals =");
719  // for(LO l = 0; l < (LO)indsA.size(); l++)
720  // if(vals[index_start+l] != ZERO)
721  // printf("%d(%8.2e)[%d] ",(LO)indsA[l],vals[index_start+l],(LO)l);
722  }
723  }
724  // Don't know what to do, so blitz the row and dump a one on the diagonal
725  for (size_t l = rowptr[row]; l < rowptr[row + 1]; l++) {
726  vals[l] = ZERO;
727  }
728  vals[diagIndexInMatrix] = TST::one();
729  numFixedDiags++;
730  }
731  } else {
732  GetOStream(Runtime0) << "WARNING: Row " << row << " has no diagonal " << std::endl;
733  }
734  } /*end row "numRows" loop"*/
735  }
736 
737  // Copy all the goop out
738  for (LO row = 0; row < (LO)numRows; row++) {
739  filteredA.replaceLocalValues(row, inds(rowptr[row], rowptr[row + 1] - rowptr[row]), vals(rowptr[row], rowptr[row + 1] - rowptr[row]));
740  }
741  if (use_spread_lumping) ExperimentalLumping(A, filteredA, DdomAllowGrowthRate, DdomCap);
742 
743  size_t g_newDrops = 0, g_oldDrops = 0, g_fixedDiags = 0;
744 
745  MueLu_sumAll(A.getRowMap()->getComm(), numNewDrops, g_newDrops);
746  MueLu_sumAll(A.getRowMap()->getComm(), numOldDrops, g_oldDrops);
747  MueLu_sumAll(A.getRowMap()->getComm(), numFixedDiags, g_fixedDiags);
748  GetOStream(Runtime0) << "Filtering out " << g_newDrops << " edges, in addition to the " << g_oldDrops << " edges dropped earlier" << std::endl;
749  GetOStream(Runtime0) << "Fixing " << g_fixedDiags << " zero diagonal values" << std::endl;
750 }
751 
752 // fancy lumping trying to not just move everything to the diagonal but to also consider moving
753 // some lumping to the kept off-diagonals. We basically aim to not increase the diagonal
754 // dominance in a row. In particular, the goal is that row i satisfies
755 //
756 // lumpedDiagDomMeasure_i <= rho2
757 // or
758 // lumpedDiagDomMeasure <= rho*unlumpedDiagDomMeasure
759 //
760 // NOTE: THIS CODE assumes direct access to a row. See comments above concerning
761 // ASSUME_DIRECT_ACCESS_TO_ROW
762 //
763 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
765  ExperimentalLumping(const Matrix& A, Matrix& filteredA, double irho, double irho2) const {
766  using TST = typename Teuchos::ScalarTraits<SC>;
767  SC zero = TST::zero();
768  SC one = TST::one();
769 
770  ArrayView<const LO> inds;
771  ArrayView<const SC> vals;
772  ArrayView<const LO> finds;
773  ArrayView<SC> fvals;
774 
775  SC PosOffSum, NegOffSum, PosOffDropSum, NegOffDropSum;
776  SC diag, gamma, alpha;
777  LO NumPosKept, NumNegKept;
778 
779  SC noLumpDdom;
780  SC numer, denom;
781  SC PosFilteredSum, NegFilteredSum;
782  SC Target;
783 
784  SC rho = as<Scalar>(irho);
785  SC rho2 = as<Scalar>(irho2);
786 
787  for (LO row = 0; row < (LO)A.getRowMap()->getLocalNumElements(); row++) {
788  noLumpDdom = as<Scalar>(10000.0); // only used if diagonal is zero
789  // the whole idea sort of breaks down
790  // when the diagonal is zero. In particular,
791  // the old diag dominance ratio is infinity
792  // ... so what do we want for the new ddom
793  // ratio. Do we want to allow the diagonal
794  // to go negative, just to have a better ddom
795  // ratio? This current choice essentially
796  // changes 'Target' to a large number
797  // meaning that we will allow the new
798  // ddom number to be fairly large (because
799  // the old one was infinity)
800 
801  ArrayView<const SC> tvals;
802  A.getLocalRowView(row, inds, vals);
803  size_t nnz = inds.size();
804  if (nnz == 0) continue;
805  filteredA.getLocalRowView(row, finds, tvals); // assume 2 getLocalRowView()s
806  // have things in same order
807  fvals = ArrayView<SC>(const_cast<SC*>(tvals.getRawPtr()), nnz);
808 
809  LO diagIndex = -1, fdiagIndex = -1;
810 
811  PosOffSum = zero;
812  NegOffSum = zero;
813  PosOffDropSum = zero;
814  NegOffDropSum = zero;
815  diag = zero;
816  NumPosKept = 0;
817  NumNegKept = 0;
818 
819  // first record diagonal, offdiagonal sums and off diag dropped sums
820  for (size_t j = 0; j < nnz; j++) {
821  if (inds[j] == row) {
822  diagIndex = j;
823  diag = vals[j];
824  } else { // offdiagonal
825  if (TST::real(vals[j]) > TST::real(zero))
826  PosOffSum += vals[j];
827  else
828  NegOffSum += vals[j];
829  }
830  }
831  PosOffDropSum = PosOffSum;
832  NegOffDropSum = NegOffSum;
833  NumPosKept = 0;
834  NumNegKept = 0;
835  LO j = 0;
836  for (size_t jj = 0; jj < (size_t)finds.size(); jj++) {
837  while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
838  // inds ... but perhaps has some entries missing
839  if (finds[jj] == row)
840  fdiagIndex = jj;
841  else {
842  if (TST::real(vals[j]) > TST::real(zero)) {
843  PosOffDropSum -= fvals[jj];
844  if (TST::real(fvals[jj]) != TST::real(zero)) NumPosKept++;
845  } else {
846  NegOffDropSum -= fvals[jj];
847  if (TST::real(fvals[jj]) != TST::real(zero)) NumNegKept++;
848  }
849  }
850  }
851 
852  // measure of diagonal dominance if no lumping is done.
853  if (TST::magnitude(diag) != TST::magnitude(zero))
854  noLumpDdom = (PosOffSum - NegOffSum) / diag;
855 
856  // Target is an acceptable diagonal dominance ratio
857  // which should really be larger than 1
858 
859  Target = rho * noLumpDdom;
860  if (TST::magnitude(Target) <= TST::magnitude(rho)) Target = rho2;
861 
862  PosFilteredSum = PosOffSum - PosOffDropSum;
863  NegFilteredSum = NegOffSum - NegOffDropSum;
864  // Note: PosNotFilterdSum is not equal to the sum of the
865  // positive entries after lumping. It just reflects the
866  // pos offdiag sum of the filtered matrix before lumping
867  // and does not account for negative dropped terms lumped
868  // to the positive kept terms.
869 
870  // dropped positive offdiags always go to the diagonal as these
871  // always improve diagonal dominance.
872 
873  diag += PosOffDropSum;
874 
875  // now lets work on lumping dropped negative offdiags
876  gamma = -NegOffDropSum - PosFilteredSum;
877 
878  if (TST::real(gamma) < TST::real(zero)) {
879  // the total amount of negative dropping is less than PosFilteredSum,
880  // so we can distribute this dropping to pos offdiags. After lumping
881  // the sum of the pos offdiags is just -gamma so we just assign pos
882  // offdiags proportional to vals[j]/PosFilteredSum
883  // Note: in this case the diagonal is not changed as all lumping
884  // occurs to the pos offdiags
885 
886  if (fdiagIndex != -1) fvals[fdiagIndex] = diag;
887  j = 0;
888  for (LO jj = 0; jj < (LO)finds.size(); jj++) {
889  while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
890  // inds ... but perhaps has some entries missing
891  if ((j != diagIndex) && (TST::real(vals[j]) > TST::real(zero)) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)))
892  fvals[jj] = -gamma * (vals[j] / PosFilteredSum);
893  }
894  } else {
895  // So there are more negative values that need lumping than kept
896  // positive offdiags. Meaning there is enough negative lumping to
897  // completely clear out all pos offdiags. If we lump all negs
898  // to pos off diags, we'd actually change them to negative. We
899  // only do this if we are desperate. Otherwise, we'll clear out
900  // all the positive kept offdiags and try to lump the rest
901  // somewhere else. We defer the clearing of pos off diags
902  // to see first if we are going to be desperate.
903 
904  bool flipPosOffDiagsToNeg = false;
905 
906  // Even if we lumped by zeroing positive offdiags, we are still
907  // going to have more lumping to distribute to either
908  // 1) the diagonal
909  // 2) the kept negative offdiags
910  // 3) the kept positive offdiags (desperate)
911 
912  // Let's first considering lumping the remaining neg offdiag stuff
913  // to the diagonal ... if this does not increase the diagonal
914  // dominance ratio too much (given by rho).
915 
916  if ((TST::real(diag) > TST::real(gamma)) &&
917  (TST::real((-NegFilteredSum) / (diag - gamma)) <= TST::real(Target))) {
918  // 1st if term above insures that resulting diagonal (=diag-gamma)
919  // is positive. . The left side of 2nd term is the diagonal dominance
920  // if we lump the remaining stuff (gamma) to the diagonal. Recall,
921  // that now there are no positive off-diags so the sum(abs(offdiags))
922  // is just the negative of NegFilteredSum
923 
924  if (fdiagIndex != -1) fvals[fdiagIndex] = diag - gamma;
925  } else if (NumNegKept > 0) {
926  // need to do some lumping to neg offdiags to avoid a large
927  // increase in diagonal dominance. We first compute alpha
928  // which measures how much gamma should go to the
929  // negative offdiags. The rest will go to the diagonal
930 
931  numer = -NegFilteredSum - Target * (diag - gamma);
932  denom = gamma * (Target - TST::one());
933 
934  // make sure that alpha is between 0 and 1 ... and that it doesn't
935  // result in a sign flip
936  // Note: when alpha is set to 1, then the diagonal is not modified
937  // and the negative offdiags just get shifted from those
938  // removed and those kept, meaning that the digaonal dominance
939  // should be the same as before
940  //
941  // can alpha be negative? It looks like denom should always
942  // be positive. The 'if' statement above
943  // Normally, diag-gamma should also be positive (but if it
944  // is negative then numer is guaranteed to be positve).
945  // look at the 'if' above,
946  // if (( TST::real(diag) > TST::real(gamma)) &&
947  // ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) {
948  //
949  // Should guarantee that numer is positive. This is obvious when
950  // the second condition is false. When it is the first condition that
951  // is false, it follows that the two indiviudal terms in the numer
952  // formula must be positive.
953 
954  if (TST::magnitude(denom) < TST::magnitude(numer))
955  alpha = TST::one();
956  else
957  alpha = numer / denom;
958  if (TST::real(alpha) < TST::real(zero)) alpha = zero;
959  if (TST::real(diag) < TST::real((one - alpha) * gamma)) alpha = TST::one();
960 
961  // first change the diagonal
962 
963  if (fdiagIndex != -1) fvals[fdiagIndex] = diag - (one - alpha) * gamma;
964 
965  // after lumping the sum of neg offdiags will be NegFilteredSum
966  // + alpha*gamma. That is the remaining negative entries altered
967  // by the percent (=alpha) of stuff (=gamma) that needs to be
968  // lumped after taking into account lumping to pos offdiags
969 
970  // Do this by assigning a fraction of NegFilteredSum+alpha*gamma
971  // proportional to vals[j]/NegFilteredSum
972 
973  SC temp = (NegFilteredSum + alpha * gamma) / NegFilteredSum;
974  j = 0;
975  for (LO jj = 0; jj < (LO)finds.size(); jj++) {
976  while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
977  // inds ... but perhaps has some entries missing
978  if ((jj != fdiagIndex) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) &&
979  (TST::real(vals[j]) < TST::real(zero)))
980  fvals[jj] = temp * vals[j];
981  }
982  } else { // desperate case
983  // So we don't have any kept negative offdiags ...
984 
985  if (NumPosKept > 0) {
986  // luckily we can push this stuff to the pos offdiags
987  // which now makes them negative
988  flipPosOffDiagsToNeg = true;
989 
990  j = 0;
991  for (LO jj = 0; jj < (LO)finds.size(); jj++) {
992  while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
993  // inds ... but perhaps has some entries missing
994  if ((j != diagIndex) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) &&
995  (TST::real(vals[j]) > TST::real(zero)))
996  fvals[jj] = -gamma / ((SC)NumPosKept);
997  }
998  }
999  // else abandon rowsum preservation and do nothing
1000  }
1001  if (!flipPosOffDiagsToNeg) { // not desperate so we now zero out
1002  // all pos terms including some
1003  // not originally filtered
1004  // but zeroed due to lumping
1005  j = 0;
1006  for (LO jj = 0; jj < (LO)finds.size(); jj++) {
1007  while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
1008  // inds ... but perhaps has some entries missing
1009  if ((jj != fdiagIndex) && (TST::real(vals[j]) > TST::real(zero))) fvals[jj] = zero;
1010  }
1011  }
1012  } // positive gamma else
1013 
1014  } // loop over all rows
1015 }
1016 
1017 } // namespace MueLu
1018 
1019 #endif // MUELU_FILTEREDAFACTORY_DEF_HPP
void sort_and_unique(T &array)
void BuildReuse(const Matrix &A, const LWGraph &G, const bool lumping, double dirichletThresh, Matrix &filteredA) const
#define MueLu_sumAll(rcpComm, in, out)
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
size_type size() const
LocalOrdinal LO
One-liner description of what is happening.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
void Build(Level &currentLevel) const
Build method.
size_type size() const
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
bool isParameter(const std::string &name) const
#define MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void ComputeNodesInAggregate(LO_view &aggPtr, LO_view &aggNodes, LO_view &unaggregated) const
Generates a compressed list of nodes in each aggregate, where the entries in aggNodes[aggPtr[i]] up t...
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void resize(size_type new_size, const value_type &x=value_type())
T * getRawPtr() const
void ExperimentalLumping(const Matrix &A, Matrix &filteredA, double rho, double rho2) const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
#define SET_VALID_ENTRY(name)
Kokkos::View< local_ordinal_type *, device_type > LO_view
Scalar SC
Lightweight MueLu representation of a compressed row storage graph.
bool IsRoot(LO i) const
Returns true if node with given local node id is marked to be a root node.
void BuildNew(const Matrix &A, const LWGraph &G, const bool lumping, double dirichletThresh, Matrix &filteredA) const
void DeclareInput(Level &currentLevel) const
Input.
Exception throws to report errors in the internal logical of the program.
void BuildNewUsingRootStencil(const Matrix &A, const LWGraph &G, double dirichletThresh, Level &currentLevel, Matrix &filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const