MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_SmooVecCoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #include "MueLu_SmootherBase.hpp"
47 #ifndef MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP
48 #define MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP
49 
50 #include <Xpetra_CrsGraph.hpp>
51 #include <Xpetra_ImportFactory.hpp>
52 #include <Xpetra_MapFactory.hpp>
53 #include <Xpetra_Map.hpp>
54 #include <Xpetra_Matrix.hpp>
55 #include <Xpetra_MultiVectorFactory.hpp>
56 #include <Xpetra_MultiVector.hpp>
57 #include <Xpetra_StridedMap.hpp>
58 #include <Xpetra_VectorFactory.hpp>
59 #include <Xpetra_Vector.hpp>
60 
62 
63 #include "MueLu_Exceptions.hpp"
64 #include "MueLu_LWGraph.hpp"
65 
66 #include "MueLu_Level.hpp"
67 #include "MueLu_LWGraph.hpp"
68 #include "MueLu_MasterList.hpp"
69 #include "MueLu_Monitor.hpp"
70 #include "MueLu_PreDropFunctionBaseClass.hpp"
71 
72 #include <Xpetra_IO.hpp>
73 
74 #include <algorithm>
75 #include <cstdlib>
76 #include <string>
77 
78 // If defined, read environment variables.
79 // Should be removed once we are confident that this works.
80 // #define DJS_READ_ENV_VARIABLES
81 
82 #include <stdio.h>
83 #include <stdlib.h>
84 #include <math.h>
85 
86 #define poly0thOrderCoef 0
87 #define poly1stOrderCoef 1
88 #define poly2ndOrderCoef 2
89 #define poly3rdOrderCoef 3
90 #define poly4thOrderCoef 4
91 
92 namespace MueLu {
93 
94 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
96  RCP<ParameterList> validParamList = rcp(new ParameterList());
97 
98 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
99  SET_VALID_ENTRY("aggregation: drop scheme");
100  {
101  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("classical", "distance laplacian", "unsupported vector smoothing"))));
102  }
103  SET_VALID_ENTRY("aggregation: number of random vectors");
104  SET_VALID_ENTRY("aggregation: number of times to pre or post smooth");
105  SET_VALID_ENTRY("aggregation: penalty parameters");
106 #undef SET_VALID_ENTRY
107 
108  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
109  validParamList->set<RCP<const FactoryBase> >("PreSmoother", Teuchos::null, "Generating factory of the PreSmoother");
110  validParamList->set<RCP<const FactoryBase> >("PostSmoother", Teuchos::null, "Generating factory of the PostSmoother");
111 
112  return validParamList;
113 }
114 
115 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
117  : predrop_(Teuchos::null) {}
118 
119 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
121  Input(currentLevel, "A");
122  if (currentLevel.IsAvailable("PreSmoother")) { // rst: totally unsure that this is legal
123  Input(currentLevel, "PreSmoother"); // my guess is that this is not yet available
124  } // so this always comes out false.
125  else if (currentLevel.IsAvailable("PostSmoother")) { // perhaps we can look on the param list?
126  Input(currentLevel, "PostSmoother");
127  }
128 }
129 
130 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
132  FactoryMonitor m(*this, "Build", currentLevel);
133 
134  typedef Teuchos::ScalarTraits<SC> STS;
135 
136  if (predrop_ != Teuchos::null)
137  GetOStream(Parameters0) << predrop_->description();
138 
139  RCP<Matrix> A = Get<RCP<Matrix> >(currentLevel, "A");
140 
141  const ParameterList& pL = GetParameterList();
142 
143  LO nPDEs = A->GetFixedBlockSize();
144 
145  RCP<MultiVector> testVecs;
146  RCP<MultiVector> nearNull;
147 
148 #ifdef takeOut
149  testVecs = Xpetra::IO<SC, LO, GO, Node>::ReadMultiVector("TpetraTVecs.mm", A->getRowMap());
150 #endif
151  size_t numRandom = as<size_t>(pL.get<int>("aggregation: number of random vectors"));
152  testVecs = MultiVectorFactory::Build(A->getRowMap(), numRandom, true);
153  // use random test vectors but should be positive in order to not get
154  // crummy results ... so take abs() of randomize().
155  testVecs->randomize();
156  for (size_t kk = 0; kk < testVecs->getNumVectors(); kk++) {
157  Teuchos::ArrayRCP<Scalar> curVec = testVecs->getDataNonConst(kk);
158  for (size_t ii = kk; ii < as<size_t>(A->getRowMap()->getLocalNumElements()); ii++) curVec[ii] = Teuchos::ScalarTraits<SC>::magnitude(curVec[ii]);
159  }
160  nearNull = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true);
161 
162  // initialize null space to constants
163  for (size_t kk = 0; kk < nearNull->getNumVectors(); kk++) {
164  Teuchos::ArrayRCP<Scalar> curVec = nearNull->getDataNonConst(kk);
165  for (size_t ii = kk; ii < as<size_t>(A->getRowMap()->getLocalNumElements()); ii += nearNull->getNumVectors()) curVec[ii] = Teuchos::ScalarTraits<Scalar>::one();
166  }
167 
168  RCP<MultiVector> zeroVec_TVecs;
169  RCP<MultiVector> zeroVec_Null;
170 
171  zeroVec_TVecs = MultiVectorFactory::Build(A->getRowMap(), testVecs->getNumVectors(), true);
172  zeroVec_Null = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true);
173  zeroVec_TVecs->putScalar(Teuchos::ScalarTraits<Scalar>::zero());
174  zeroVec_Null->putScalar(Teuchos::ScalarTraits<Scalar>::zero());
175 
176  size_t nInvokeSmoother = as<size_t>(pL.get<int>("aggregation: number of times to pre or post smooth"));
177  if (currentLevel.IsAvailable("PreSmoother")) {
178  RCP<SmootherBase> preSmoo = currentLevel.Get<RCP<SmootherBase> >("PreSmoother");
179  for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*testVecs, *zeroVec_TVecs, false);
180  for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*nearNull, *zeroVec_Null, false);
181  } else if (currentLevel.IsAvailable("PostSmoother")) {
182  RCP<SmootherBase> postSmoo = currentLevel.Get<RCP<SmootherBase> >("PostSmoother");
183  for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*testVecs, *zeroVec_TVecs, false);
184  for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*nearNull, *zeroVec_Null, false);
185  } else
186  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Must set a smoother");
187 
188  Teuchos::ArrayRCP<Scalar> penaltyPolyCoef(5);
189  Teuchos::ArrayView<const double> inputPolyCoef;
190 
191  penaltyPolyCoef[poly0thOrderCoef] = 12.;
192  penaltyPolyCoef[poly1stOrderCoef] = -.2;
193  penaltyPolyCoef[poly2ndOrderCoef] = 0.0;
194  penaltyPolyCoef[poly3rdOrderCoef] = 0.0;
195  penaltyPolyCoef[poly4thOrderCoef] = 0.0;
196 
197  if (pL.isParameter("aggregation: penalty parameters") && pL.get<Teuchos::Array<double> >("aggregation: penalty parameters").size() > 0) {
198  if (pL.get<Teuchos::Array<double> >("aggregation: penalty parameters").size() > penaltyPolyCoef.size())
199  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of penalty parameters must be " << penaltyPolyCoef.size() << " or less");
200  inputPolyCoef = pL.get<Teuchos::Array<double> >("aggregation: penalty parameters")();
201 
202  for (size_t i = 0; i < as<size_t>(inputPolyCoef.size()); i++) penaltyPolyCoef[i] = as<Scalar>(inputPolyCoef[i]);
203  for (size_t i = as<size_t>(inputPolyCoef.size()); i < as<size_t>(penaltyPolyCoef.size()); i++) penaltyPolyCoef[i] = Teuchos::ScalarTraits<Scalar>::zero();
204  }
205 
206  RCP<LWGraph> filteredGraph;
207  badGuysCoalesceDrop(*A, penaltyPolyCoef, nPDEs, *testVecs, *nearNull, filteredGraph);
208 
209 #ifdef takeOut
210  /* write out graph for serial debugging purposes only. */
211 
212  FILE* fp = fopen("codeOutput", "w");
213  fprintf(fp, "%d %d %d\n", (int)filteredGraph->GetNodeNumVertices(), (int)filteredGraph->GetNodeNumVertices(),
214  (int)filteredGraph->GetNodeNumEdges());
215  for (size_t i = 0; i < filteredGraph->GetNodeNumVertices(); i++) {
216  auto inds = filteredGraph->getNeighborVertices(as<LO>(i));
217  for (size_t j = 0; j < as<size_t>(inds.size()); j++) {
218  fprintf(fp, "%d %d 1.00e+00\n", (int)i + 1, (int)inds[j] + 1);
219  }
220  }
221  fclose(fp);
222 #endif
223 
224  SC threshold = .01;
225  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
226  Set(currentLevel, "Graph", filteredGraph);
227  Set(currentLevel, "DofsPerNode", 1);
228 
229 } // Build
230 
231 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
232 void SmooVecCoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP<Scalar>& penaltyPolyCoef, LO nPDEs, const MultiVector& testVecs, const MultiVector& nearNull, RCP<LWGraph>& filteredGraph) const {
233  /*
234  * Compute coalesce/drop graph (in filteredGraph) for A. The basic idea is to
235  * balance trade-offs associated with
236  *
237  * (I - P inv(R P) R ) testVecs
238  *
239  * being worse for larger aggregates (less dropping) while MG cycle costs are
240  * cheaper with larger aggregates. MG costs are "penalties" in the
241  * optimization while (I - P inv(R P) R ) is the "fit" (how well a
242  * a fine grid function can be approximated on the coarse grid).
243  *
244  * For MG costs, we don't actually approximate the cost. Instead, we
245  * have just hardwired penalties below. Specifically,
246  *
247  * penalties[j] is the cost if aggregates are of size j+1, where right
248  * now a linear function of the form const*(60-j) is used.
249  *
250  * (I - P inv(P^T P) P^T ) testVecs is estimated by just looking locally at
251  * the vector portion corresponding to a possible aggregate defined by
252  * all non-dropped connections in the ith row. A tentative prolognator is
253  * used for P. This prolongator corresponds to a null space vector given
254  * by 'nearNull', which is provided to dropper(). In initial testing, nearNull is
255  * first set as a vector of all 1's and then smoothed with a relaxation
256  * method applied to a nice matrix (with the same sparsity pattern as A).
257  * Originally, nearNull was used to handle Dir bcs where relaxation of the
258  * vector of 1's has a more pronounced effect.
259  *
260  * For PDE systems, fit only considers the same dof at each node. That is,
261  * it effectively assumes that we have a tentative prolongator with no
262  * coupling between different dof types. When checking the fit for the kth
263  * dof at a paritcular node, it only considers the kth dof of this node
264  * and neighboring nodes.
265  *
266  * Note: testVecs is supplied by the user, but normally is the result of
267  * applying a relaxation scheme to Au = 0 where u is initial random.
268  */
269 
270  GO numMyNnz = Teuchos::as<GO>(Amat.getLocalNumEntries());
271  size_t nLoc = Amat.getRowMap()->getLocalNumElements();
272 
273  size_t nBlks = nLoc / nPDEs;
274  if (nBlks * nPDEs != nLoc)
275  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of local dofs not divisible by BlkSize");
276 
277  typename LWGraph::row_type::non_const_type newRowPtr("newRowPtr", nBlks + 1); /* coalesce & drop matrix */
278  Teuchos::ArrayRCP<LO> newCols(numMyNnz); /* arrays */
279 
280  Teuchos::ArrayRCP<LO> bcols(nBlks); /* returned by dropfun(j,...) */
281  Teuchos::ArrayRCP<bool> keepOrNot(nBlks); /* gives cols for jth row and */
282  /* whether or not entry is */
283  /* kept or dropped. */
284 
285  LO maxNzPerRow = 200;
286  Teuchos::ArrayRCP<Scalar> penalties(maxNzPerRow); /* Penalty function */
287  /* described above. */
288 
289  Teuchos::ArrayRCP<bool> keepStatus(nBlks, true); /* accumulated keepOrNot info */
290  Teuchos::ArrayRCP<LO> bColList(nBlks); /* accumulated bcols info */
291  /* for an entire block as */
292  /* opposed to a single row */
293  /* Additionally, keepOrNot[j] */
294  /* refers to status of jth */
295  /* entry in a row while */
296  /* keepStatus[j] refers to */
297  /* whether the jth block is */
298  /* kept within the block row. */
299 
300  Teuchos::ArrayRCP<bool> alreadyOnBColList(nBlks, false); /* used to avoid recording the*/
301  /* same block column when */
302  /* processing different pt */
303  /* rows within a block. */
304 
305  typename LWGraph::boundary_nodes_type boundaryNodes("boundaryNodes", nBlks);
306  Kokkos::deep_copy(boundaryNodes, false);
307 
308  for (LO i = 0; i < maxNzPerRow; i++)
309  penalties[i] = penaltyPolyCoef[poly0thOrderCoef] +
310  penaltyPolyCoef[poly1stOrderCoef] * (as<Scalar>(i)) +
311  penaltyPolyCoef[poly2ndOrderCoef] * (as<Scalar>(i * i)) +
312  (penaltyPolyCoef[poly3rdOrderCoef] * (as<Scalar>(i * i)) * (as<Scalar>(i))) + // perhaps avoids overflow?
313  (penaltyPolyCoef[poly4thOrderCoef] * (as<Scalar>(i * i)) * (as<Scalar>(i * i)));
314 
315  LO nzTotal = 0, numBCols = 0, row = -1, Nbcols, bcol;
316  newRowPtr(0) = 0;
317 
318  /* proceed block by block */
319  for (LO i = 0; i < as<LO>(nBlks); i++) {
320  newRowPtr[i + 1] = newRowPtr[i];
321  for (LO j = 0; j < nPDEs; j++) {
322  row = row + 1;
323 
326 
327  Amat.getLocalRowView(row, indices, vals);
328 
329  if (indices.size() > maxNzPerRow) {
330  LO oldSize = maxNzPerRow;
331  maxNzPerRow = indices.size() + 100;
332  penalties.resize(as<size_t>(maxNzPerRow), 0.0);
333  for (LO k = oldSize; k < maxNzPerRow; k++)
334  penalties[k] = penaltyPolyCoef[poly0thOrderCoef] +
335  penaltyPolyCoef[poly1stOrderCoef] * (as<Scalar>(i)) +
336  penaltyPolyCoef[poly2ndOrderCoef] * (as<Scalar>(i * i)) +
337  (penaltyPolyCoef[poly3rdOrderCoef] * (as<Scalar>(i * i)) * (as<Scalar>(i))) +
338  (penaltyPolyCoef[poly4thOrderCoef] * (as<Scalar>(i * i)) * (as<Scalar>(i * i)));
339  }
340  badGuysDropfunc(row, indices, vals, testVecs, nPDEs, penalties, nearNull, bcols, keepOrNot, Nbcols, nLoc);
341  for (LO k = 0; k < Nbcols; k++) {
342  bcol = bcols[k];
343 
344  /* add to bColList if not already on it */
345 
346  if (alreadyOnBColList[bcol] == false) { /* for PDE systems only record */
347  bColList[numBCols++] = bcol; /* neighboring block one time */
348  alreadyOnBColList[bcol] = true;
349  }
350  /* drop if any pt row within block indicates entry should be dropped */
351 
352  if (keepOrNot[k] == false) keepStatus[bcol] = false;
353 
354  } /* for (k=0; k < Nbcols; k++) */
355  } /* for (j = 0; i < nPDEs; j++) */
356 
357  /* finished with block row. Now record block entries that we keep */
358  /* and reset keepStatus, bColList, and alreadyOnBColList. */
359 
360  if (numBCols < 2) boundaryNodes[i] = true;
361  for (LO j = 0; j < numBCols; j++) {
362  bcol = bColList[j];
363  if (keepStatus[bcol] == true) {
364  newCols[nzTotal] = bColList[j];
365  newRowPtr(i + 1)++;
366  nzTotal = nzTotal + 1;
367  }
368  keepStatus[bcol] = true;
369  alreadyOnBColList[bcol] = false;
370  bColList[j] = 0;
371  }
372  numBCols = 0;
373  } /* for (i = 0; i < nBlks; i++) */
374 
375  /* create array of the correct size and copy over newCols to it */
376 
377  typename LWGraph::entries_type::non_const_type finalCols("finalCols", nzTotal);
378  for (LO i = 0; i < nzTotal; i++) finalCols(i) = newCols[i];
379 
380  // Not using column map because we do not allow for any off-proc stuff.
381  // Not sure if this is okay. FIXME
382 
383  RCP<const Map> rowMap = Amat.getRowMap(); // , colMap = Amat.getColMap();
384 
385  LO nAmalgNodesOnProc = rowMap->getLocalNumElements() / nPDEs;
386  Teuchos::Array<GO> nodalGIDs(nAmalgNodesOnProc);
388  for (size_t i = 0; i < as<size_t>(nAmalgNodesOnProc); i++) {
389  GO gid = rowMap->getGlobalElement(i * nPDEs);
391  nodalGIDs[i] = as<GO>(floor(temp));
392  }
393  GO nAmalgNodesGlobal = rowMap->getGlobalNumElements();
394  GO nBlkGlobal = nAmalgNodesGlobal / nPDEs;
395  if (nBlkGlobal * nPDEs != nAmalgNodesGlobal)
396  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of global dofs not divisible by BlkSize");
397 
398  Teuchos::RCP<Map> AmalgRowMap = MapFactory::Build(rowMap->lib(), nBlkGlobal,
399  nodalGIDs(), 0, rowMap->getComm());
400 
401  filteredGraph = rcp(new LWGraph(newRowPtr, finalCols, AmalgRowMap, AmalgRowMap, "thresholded graph of A"));
402  filteredGraph->SetBoundaryNodeMap(boundaryNodes);
403 }
404 
405 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
406 void SmooVecCoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::badGuysDropfunc(LO row, const Teuchos::ArrayView<const LocalOrdinal>& cols, const Teuchos::ArrayView<const Scalar>& vals, const MultiVector& testVecs, LO nPDEs, Teuchos::ArrayRCP<Scalar>& penalties, const MultiVector& nearNull, Teuchos::ArrayRCP<LO>& Bcols, Teuchos::ArrayRCP<bool>& keepOrNot, LO& Nbcols, LO nLoc) const {
407  using TST = Teuchos::ScalarTraits<Scalar>;
408 
409  LO nLeng = cols.size();
410  typename TST::coordinateType temp;
411  temp = ((typename TST::coordinateType)(row)) / ((typename TST::coordinateType)(nPDEs));
412  LO blkRow = as<LO>(floor(temp));
413  Teuchos::ArrayRCP<Scalar> badGuy(nLeng, 0.0);
414  Teuchos::ArrayRCP<Scalar> subNull(nLeng, 0.0); /* subset of nearNull */
415  /* associated with current */
416  /* dof within node. */
417 
418  /* Only consider testVecs associated with same dof & on processor. Further */
419  /* collapse testVecs to a single badGuy vector by basically taking the worst */
420  /* (least smooth) values for each of the off diags. In particular, we look at*/
421  /* the ratio of each off-diag test value / diag test value and compare this */
422  /* with the nearNull vector ratio. The further the testVec ratio is from the */
423  /* nearNull ratio, the harder is will be to accurately interpolate is these */
424  /* two guys are aggregated. So, the biggest ratio mismatch is used to choose */
425  /* the testVec entry associated with each off-diagonal entry. */
426 
427  for (LO i = 0; i < nLeng; i++) keepOrNot[i] = false;
428 
429  LO diagInd = -1;
430  Nbcols = 0;
431  LO rowDof = row - blkRow * nPDEs;
432  Teuchos::ArrayRCP<const Scalar> oneNull = nearNull.getData(as<size_t>(rowDof));
433 
434  for (LO i = 0; i < nLeng; i++) {
435  if ((cols[i] < nLoc) && (TST::magnitude(vals[i]) != 0.0)) { /* on processor */
436  temp = ((typename TST::coordinateType)(cols[i])) / ((typename TST::coordinateType)(nPDEs));
437  LO colDof = cols[i] - (as<LO>(floor(temp))) * nPDEs;
438  if (colDof == rowDof) { /* same dof within node as row */
439  Bcols[Nbcols] = (cols[i] - colDof) / nPDEs;
440  subNull[Nbcols] = oneNull[cols[i]];
441 
442  if (cols[i] != row) { /* not diagonal */
443  Scalar worstRatio = -TST::one();
444  Scalar targetRatio = subNull[Nbcols] / oneNull[row];
445  Scalar actualRatio;
446  for (size_t kk = 0; kk < testVecs.getNumVectors(); kk++) {
447  Teuchos::ArrayRCP<const Scalar> curVec = testVecs.getData(kk);
448  actualRatio = curVec[cols[i]] / curVec[row];
449  if (TST::magnitude(actualRatio - targetRatio) > TST::magnitude(worstRatio)) {
450  badGuy[Nbcols] = actualRatio;
451  worstRatio = Teuchos::ScalarTraits<SC>::magnitude(actualRatio - targetRatio);
452  }
453  }
454  } else {
455  badGuy[Nbcols] = 1.;
456  keepOrNot[Nbcols] = true;
457  diagInd = Nbcols;
458  }
459  (Nbcols)++;
460  }
461  }
462  }
463 
464  /* Make sure that diagonal entry is in block col list */
465 
466  if (diagInd == -1) {
467  Bcols[Nbcols] = (row - rowDof) / nPDEs;
468  subNull[Nbcols] = 1.;
469  badGuy[Nbcols] = 1.;
470  keepOrNot[Nbcols] = true;
471  diagInd = Nbcols;
472  (Nbcols)++;
473  }
474 
475  Scalar currentRP = oneNull[row] * oneNull[row];
476  Scalar currentRTimesBadGuy = oneNull[row] * badGuy[diagInd];
477  Scalar currentScore = penalties[0]; /* (I - P inv(R*P)*R )=0 for size */
478  /* size 1 agg, so fit is perfect */
479 
480  /* starting from a set that only includes the diagonal entry consider adding */
481  /* one off-diagonal at a time until the fitValue exceeds the penalty term. */
482  /* Here, the fit value is (I - P inv(R P) R ) and we always consider the */
483  /* lowest fitValue that is not currently in the set. R and P correspond to */
484  /* a simple tentaive grid transfer associated with an aggregate that */
485  /* includes the diagonal, all already determined neighbors, and the potential*/
486  /* new neighbor */
487 
488  LO nKeep = 1, flag = 1, minId;
489  Scalar minFit, minFitRP = 0., minFitRTimesBadGuy = 0.;
490  Scalar newRP, newRTimesBadGuy;
491 
492  while (flag == 1) {
493  /* compute a fit for each possible off-diagonal neighbor */
494  /* that has not already been added as a neighbor */
495 
496  minFit = 1000000.;
497  minId = -1;
498 
499  for (LO i = 0; i < Nbcols; i++) {
500  if (keepOrNot[i] == false) {
501  keepOrNot[i] = true; /* temporarily view i as non-dropped neighbor */
502  newRP = currentRP + subNull[i] * subNull[i];
503  newRTimesBadGuy = currentRTimesBadGuy + subNull[i] * badGuy[i];
504  Scalar ratio = newRTimesBadGuy / newRP;
505 
506  Scalar newFit = 0.0;
507  for (LO k = 0; k < Nbcols; k++) {
508  if (keepOrNot[k] == true) {
509  Scalar diff = badGuy[k] - ratio * subNull[k];
510  newFit = newFit + diff * diff;
511  }
512  }
514  minId = i;
515  minFit = newFit;
516  minFitRP = newRP;
517  minFitRTimesBadGuy = newRTimesBadGuy;
518  }
519  keepOrNot[i] = false;
520  }
521  }
522  if (minId == -1)
523  flag = 0;
524  else {
525  minFit = sqrt(minFit);
526  Scalar newScore = penalties[nKeep] + minFit;
528  nKeep = nKeep + 1;
529  keepOrNot[minId] = true;
530  currentScore = newScore;
531  currentRP = minFitRP;
532  currentRTimesBadGuy = minFitRTimesBadGuy;
533  } else
534  flag = 0;
535  }
536  }
537 }
538 
539 } // namespace MueLu
540 
541 #endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP
void setValidator(RCP< const ParameterEntryValidator > const &validator)
GlobalOrdinal GO
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
size_type size() const
LocalOrdinal LO
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
size_type size() const
void resize(const size_type n, const T &val=T())
void Build(Level &currentLevel) const
Build an object with this factory.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
#define SET_VALID_ENTRY(name)
static RCP< MultiVector > ReadMultiVector(const std::string &fileName, const RCP< const Map > &map)
void DeclareInput(Level &currentLevel) const
Input.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
Print class parameters.
static magnitudeType magnitude(T a)
size_type size() const
Scalar SC
void badGuysDropfunc(LO row, const Teuchos::ArrayView< const LocalOrdinal > &indices, const Teuchos::ArrayView< const Scalar > &vals, const MultiVector &smoothedTVecs, LO nPDEs, Teuchos::ArrayRCP< Scalar > &penalties, const MultiVector &smoothedNull, Teuchos::ArrayRCP< LO > &Bcols, Teuchos::ArrayRCP< bool > &keepOrNot, LO &Nbcols, LO nLoc) const
Lightweight MueLu representation of a compressed row storage graph.
Exception throws to report errors in the internal logical of the program.
void badGuysCoalesceDrop(const Matrix &Amat, Teuchos::ArrayRCP< Scalar > &dropParams, LO nPDEs, const MultiVector &smoothedTVecs, const MultiVector &smoothedNull, RCP< LWGraph > &filteredGraph) const
Methods to support compatible-relaxation style dropping.
ParameterEntry & getEntry(const std::string &name)
virtual void Apply(MultiVector &x, const MultiVector &rhs, bool InitialGuessIsZero=false) const =0
Apply smoother.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.