MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_SaPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_SAPFACTORY_DEF_HPP
11 #define MUELU_SAPFACTORY_DEF_HPP
12 
13 #if KOKKOS_VERSION >= 40799
14 #include "KokkosKernels_ArithTraits.hpp"
15 #else
16 #include "Kokkos_ArithTraits.hpp"
17 #endif
19 
20 #include <Xpetra_Matrix.hpp>
21 #include <Xpetra_IteratorOps.hpp>
22 
24 #include "MueLu_Level.hpp"
25 #include "MueLu_MasterList.hpp"
26 #include "MueLu_Monitor.hpp"
27 #include "MueLu_PerfUtils.hpp"
28 #include "MueLu_TentativePFactory.hpp"
29 #include "MueLu_Utilities.hpp"
30 
31 #include <sstream>
32 
33 namespace MueLu {
34 
35 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
37 
38 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
40 
41 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
43  RCP<ParameterList> validParamList = rcp(new ParameterList());
44 
45 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
46  SET_VALID_ENTRY("sa: damping factor");
47  SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
48  SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
49  SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
50  SET_VALID_ENTRY("sa: enforce constraints");
51  SET_VALID_ENTRY("tentative: calculate qr");
52  SET_VALID_ENTRY("sa: max eigenvalue");
53  SET_VALID_ENTRY("sa: rowsumabs diagonal replacement tolerance");
54  SET_VALID_ENTRY("sa: rowsumabs diagonal replacement value");
55  SET_VALID_ENTRY("sa: rowsumabs replace single entry row with zero");
56  SET_VALID_ENTRY("sa: rowsumabs use automatic diagonal tolerance");
57  SET_VALID_ENTRY("use kokkos refactor");
58 #undef SET_VALID_ENTRY
59 
60  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
61  validParamList->set<RCP<const FactoryBase> >("P", Teuchos::null, "Tentative prolongator factory");
62 
63  // Make sure we don't recursively validate options for the matrixmatrix kernels
64  ParameterList norecurse;
65  norecurse.disableRecursiveValidation();
66  validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
67 
68  return validParamList;
69 }
70 
71 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
73  Input(fineLevel, "A");
74 
75  // Get default tentative prolongator factory
76  // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
77  RCP<const FactoryBase> initialPFact = GetFactory("P");
78  if (initialPFact == Teuchos::null) {
79  initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
80  }
81  coarseLevel.DeclareInput("P", initialPFact.get(), this);
82 }
83 
84 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
86  return BuildP(fineLevel, coarseLevel);
87 }
88 
89 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
91  FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
92 
93  std::string levelIDs = toString(coarseLevel.GetLevelID());
94 
95  const std::string prefix = "MueLu::SaPFactory(" + levelIDs + "): ";
96 
97  typedef typename Teuchos::ScalarTraits<SC>::coordinateType Coordinate;
98  typedef typename Teuchos::ScalarTraits<SC>::magnitudeType MT;
99 
100  // Get default tentative prolongator factory
101  // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
102  // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
103  RCP<const FactoryBase> initialPFact = GetFactory("P");
104  if (initialPFact == Teuchos::null) {
105  initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
106  }
107  const ParameterList& pL = GetParameterList();
108 
109  // Level Get
110  RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
111  RCP<Matrix> Ptent = coarseLevel.Get<RCP<Matrix> >("P", initialPFact.get());
112  RCP<Matrix> finalP;
113  // If Tentative facctory bailed out (e.g., number of global aggregates is 0), then SaPFactory bails
114  // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
115  if (Ptent == Teuchos::null) {
116  finalP = Teuchos::null;
117  Set(coarseLevel, "P", finalP);
118  return;
119  }
120 
121  if (restrictionMode_) {
122  SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
123  A = Utilities::Transpose(*A, true); // build transpose of A explicitly
124  }
125 
126  // Build final prolongator
127 
128  // Reuse pattern if available
129  RCP<ParameterList> APparams;
130  if (pL.isSublist("matrixmatrix: kernel params"))
131  APparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
132  else
133  APparams = rcp(new ParameterList);
134  if (coarseLevel.IsAvailable("AP reuse data", this)) {
135  GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
136 
137  APparams = coarseLevel.Get<RCP<ParameterList> >("AP reuse data", this);
138 
139  if (APparams->isParameter("graph"))
140  finalP = APparams->get<RCP<Matrix> >("graph");
141  }
142  // By default, we don't need global constants for SaP
143  APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false));
144  APparams->set("compute global constants", APparams->get("compute global constants", false));
145 
146  const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
147  const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
148  const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
149  const bool useAbsValueRowSum = pL.get<bool>("sa: use rowsumabs diagonal scaling");
150  const bool doQRStep = pL.get<bool>("tentative: calculate qr");
151  const bool enforceConstraints = pL.get<bool>("sa: enforce constraints");
152  const MT userDefinedMaxEigen = as<MT>(pL.get<double>("sa: max eigenvalue"));
153  double dTol = pL.get<double>("sa: rowsumabs diagonal replacement tolerance");
154  const MT diagonalReplacementTolerance = (dTol == as<double>(-1) ? Teuchos::ScalarTraits<MT>::eps() * 100 : as<MT>(pL.get<double>("sa: rowsumabs diagonal replacement tolerance")));
155  const SC diagonalReplacementValue = as<SC>(pL.get<double>("sa: rowsumabs diagonal replacement value"));
156  const bool replaceSingleEntryRowWithZero = pL.get<bool>("sa: rowsumabs replace single entry row with zero");
157  const bool useAutomaticDiagTol = pL.get<bool>("sa: rowsumabs use automatic diagonal tolerance");
158 
159  // Sanity checking
160  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints, Exceptions::RuntimeError,
161  "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
162 
163  if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
164  Scalar lambdaMax;
165  Teuchos::RCP<Vector> invDiag;
166  if (userDefinedMaxEigen == -1.) {
167  SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
168  lambdaMax = A->GetMaxEigenvalueEstimate();
169  if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
170  GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = " << maxEigenIterations << ((useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
171  Coordinate stopTol = 1e-4;
172  if (useAbsValueRowSum) {
173  const bool returnReciprocal = true;
174  invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
175  diagonalReplacementTolerance,
176  diagonalReplacementValue,
177  replaceSingleEntryRowWithZero,
178  useAutomaticDiagTol);
179  TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError,
180  "SaPFactory: eigenvalue estimate: diagonal reciprocal is null.");
181  lambdaMax = Utilities::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
182  } else
183  lambdaMax = Utilities::PowerMethod(*A, true, maxEigenIterations, stopTol);
184  A->SetMaxEigenvalueEstimate(lambdaMax);
185  } else {
186  GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
187  }
188  } else {
189  lambdaMax = userDefinedMaxEigen;
190  A->SetMaxEigenvalueEstimate(lambdaMax);
191  }
192  GetOStream(Statistics1) << "Prolongator damping factor = " << dampingFactor / lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
193 
194  {
195  SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
196  if (!useAbsValueRowSum)
197  invDiag = Utilities::GetMatrixDiagonalInverse(*A); // default
198  else if (invDiag == Teuchos::null) {
199  GetOStream(Runtime0) << "Using rowsumabs diagonal" << std::endl;
200  const bool returnReciprocal = true;
201  invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
202  diagonalReplacementTolerance,
203  diagonalReplacementValue,
204  replaceSingleEntryRowWithZero,
205  useAutomaticDiagTol);
206  TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: diagonal reciprocal is null.");
207  }
208 
209  SC omega = dampingFactor / lambdaMax;
210  TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
211 
212  {
213  SubFactoryMonitor m3(*this, "Xpetra::IteratorOps::Jacobi", coarseLevel);
214  // finalP = Ptent + (I - \omega D^{-1}A) Ptent
215  finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
216  if (enforceConstraints) {
217  if (!pL.get<bool>("use kokkos refactor")) {
218  if (A->GetFixedBlockSize() == 1)
219  optimalSatisfyPConstraintsForScalarPDEsNonKokkos(finalP);
220  else
221  SatisfyPConstraintsNonKokkos(A, finalP);
222  } else {
223  // if (A->GetFixedBlockSize() == 1)
224  // optimalSatisfyPConstraintsForScalarPDEs(finalP);
225  // else
226  SatisfyPConstraints(A, finalP);
227  }
228  }
229  }
230  }
231 
232  } else {
233  finalP = Ptent;
234  }
235 
236  // Level Set
237  RCP<Matrix> R;
238  if (!restrictionMode_) {
239  // prolongation factory is in prolongation mode
240  if (!finalP.is_null()) {
241  std::ostringstream oss;
242  oss << "P_" << coarseLevel.GetLevelID();
243  finalP->setObjectLabel(oss.str());
244  }
245  Set(coarseLevel, "P", finalP);
246 
247  APparams->set("graph", finalP);
248  Set(coarseLevel, "AP reuse data", APparams);
249 
250  // NOTE: EXPERIMENTAL
251  if (Ptent->IsView("stridedMaps"))
252  finalP->CreateView("stridedMaps", Ptent);
253 
254  } else {
255  // prolongation factory is in restriction mode
256  {
257  SubFactoryMonitor m2(*this, "Transpose P", coarseLevel);
258  R = Utilities::Transpose(*finalP, true);
259  if (!R.is_null()) {
260  std::ostringstream oss;
261  oss << "R_" << coarseLevel.GetLevelID();
262  R->setObjectLabel(oss.str());
263  }
264  }
265 
266  Set(coarseLevel, "R", R);
267 
268  // NOTE: EXPERIMENTAL
269  if (Ptent->IsView("stridedMaps"))
270  R->CreateView("stridedMaps", Ptent, true /*transposeA*/);
271  }
272 
273  if (IsPrint(Statistics2)) {
274  RCP<ParameterList> params = rcp(new ParameterList());
275  params->set("printLoadBalancingInfo", true);
276  params->set("printCommInfo", true);
277  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo((!restrictionMode_ ? *finalP : *R), (!restrictionMode_ ? "P" : "R"), params);
278  }
279 
280 } // Build()
281 
282 // Analyze the grid transfer produced by smoothed aggregation and make
283 // modifications if it does not look right. In particular, if there are
284 // negative entries or entries larger than 1, modify P's rows.
285 //
286 // Note: this kind of evaluation probably only makes sense if not doing QR
287 // when constructing tentative P.
288 //
289 // For entries that do not satisfy the two constraints (>= 0 or <=1) we set
290 // these entries to the constraint value and modify the rest of the row
291 // so that the row sum remains the same as before by adding an equal
292 // amount to each remaining entry. However, if the original row sum value
293 // violates the constraints, we set the row sum back to 1 (the row sum of
294 // tentative P). After doing the modification to a row, we need to check
295 // again the entire row to make sure that the modified row does not violate
296 // the constraints.
297 
298 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
302  LO nPDEs = A->GetFixedBlockSize();
303  Teuchos::ArrayRCP<Scalar> ConstraintViolationSum(nPDEs);
304  Teuchos::ArrayRCP<Scalar> Rsum(nPDEs);
305  Teuchos::ArrayRCP<size_t> nPositive(nPDEs);
306  for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
307  for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
308  for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
309 
310  for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
314  P->getLocalRowView((LO)i, indices, vals1);
315  size_t nnz = indices.size();
316  if (nnz == 0) continue;
317 
318  vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
319 
320  bool checkRow = true;
321 
322  while (checkRow) {
323  // check constraints and compute the row sum
324 
325  for (LO j = 0; j < indices.size(); j++) {
326  Rsum[j % nPDEs] += vals[j];
328  ConstraintViolationSum[j % nPDEs] += vals[j];
329  vals[j] = zero;
330  } else {
332  (nPositive[j % nPDEs])++;
333 
335  ConstraintViolationSum[j % nPDEs] += (vals[j] - one);
336  vals[j] = one;
337  }
338  }
339  }
340 
341  checkRow = false;
342 
343  // take into account any row sum that violates the contraints
344 
345  for (size_t k = 0; k < (size_t)nPDEs; k++) {
347  ConstraintViolationSum[k] += (-Rsum[k]); // rstumin
349  ConstraintViolationSum[k] += (one - Rsum[k]); // rstumin
350  }
351  }
352 
353  // check if row need modification
354  for (size_t k = 0; k < (size_t)nPDEs; k++) {
355  if (Teuchos::ScalarTraits<SC>::magnitude(ConstraintViolationSum[k]) != Teuchos::ScalarTraits<SC>::magnitude(zero))
356  checkRow = true;
357  }
358  // modify row
359  if (checkRow) {
360  for (LO j = 0; j < indices.size(); j++) {
362  vals[j] += (ConstraintViolationSum[j % nPDEs] / (as<Scalar>(nPositive[j % nPDEs])));
363  }
364  }
365  for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
366  }
367  for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
368  for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
369  } // while (checkRow) ...
370  } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
371 } // SatsifyPConstraints()
372 
373 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
377 
378  LocalOrdinal maxEntriesPerRow = 100; // increased later if needed
379  Teuchos::ArrayRCP<Scalar> scalarData(3 * maxEntriesPerRow);
380  bool hasFeasible;
381 
382  for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
386  P->getLocalRowView((LocalOrdinal)i, indices, vals1);
387  size_t nnz = indices.size();
388  if (nnz != 0) {
389  vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
390  Scalar rsumTarget = zero;
391  for (size_t j = 0; j < nnz; j++) rsumTarget += vals[j];
392 
393  if (nnz > as<size_t>(maxEntriesPerRow)) {
394  maxEntriesPerRow = nnz * 3;
395  scalarData.resize(3 * maxEntriesPerRow);
396  }
397  hasFeasible = constrainRow(vals.getRawPtr(), as<LocalOrdinal>(nnz), zero, one, rsumTarget, vals.getRawPtr(), scalarData.getRawPtr());
398 
399  if (!hasFeasible) { // just set all entries to the same value giving a row sum of 1
400  for (size_t j = 0; j < nnz; j++) vals[j] = one / as<Scalar>(nnz);
401  }
402  }
403 
404  } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
405 } // SatsifyPConstraints()
406 
407 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
408 bool SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::constrainRow(Scalar* orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar* fixedUnsorted, Scalar* scalarData) const {
409  /*
410  Input
411  orig data that should be adjusted to satisfy bound constraints and
412  row sum constraint. orig is not modified by this function.
413 
414  nEntries length or 'orig'
415 
416  leftBound, define bound constraints for the results.
417  rghtBound
418 
419  rsumTarget defines an equality constraint for the row sum
420 
421  fixedUnsorted on output, if a feasible solutuion exists then
422  || orig - fixedUnsorted || = min when also
423  leftBound <= fixedUnsorted[i] <= rghtBound for all i
424  and sum(fixedUnsorted) = rsumTarget.
425 
426  Note: it is possible to use the same pointer for
427  fixedUnsorted and orig. In this case, orig gets
428  overwritten with the new constraint satisfying values.
429 
430  scalarData a work array that should be 3x nEntries.
431 
432  On return constrain() indicates whether or not a feasible solution exists.
433  */
434 
435  /*
436  Given a sequence of numbers o1 ... on, fix these so that they are as
437  close as possible to the original but satisfy bound constraints and also
438  have the same row sum as the oi's. If we know who is going to lie on a
439  bound, then the "best" answer (i.e., || o - f ||_2 = min) perturbs
440  each element that doesn't lie on a bound by the same amount.
441 
442  We can represent the oi's by considering scattered points on a number line
443 
444  | |
445  | |
446  o o o | o o o o o o |o o
447  | |
448 
449  \____/ \____/
450  <---- <----
451  delta delta
452 
453  Bounds are shown by vertical lines. The fi's must lie within the bounds, so
454  the 3 leftmost points must be shifted to the right and the 2 rightmost must
455  be shifted to the left. If these fi points are all shifted to the bounds
456  while the others remain in the same location, the row sum constraint is
457  likely not satisfied and so more shifting is necessary. In the figure, the f
458  rowsum is too large and so there must be more shifting to the left.
459 
460  To minimize || o - f ||_2, we basically shift all "interiors" by the same
461  amount, denoted delta. The only trick is that some points near bounds are
462  still affected by the bounds and so these points might be shifted more or less
463  than delta. In the example,t he 3 rightmost points are shifted in the opposite
464  direction as delta to the bound. The 4th point is shifted by something less
465  than delta so it does not violate the lower bound. The rightmost point is
466  shifted to the bound by some amount larger than delta. However, the 2nd point
467  is shifted by delta (i.e., it lies inside the two bounds).
468 
469  If we know delta, we can figure out everything. If we know which points
470  are special (not shifted by delta), we can also figure out everything.
471  The problem is these two things (delta and the special points) are
472  inter-connected. An algorithm for computing follows.
473 
474  1) move exterior points to the bounds and compute how much the row sum is off
475  (rowSumDeviation). We assume now that the new row sum is high, so interior
476  points must be shifted left.
477 
478  2) Mark closest point just left of the leftmost bound, closestToLeftBound,
479  and compute its distance to the leftmost bound. Mark closest point to the
480  left of the rightmost bound, closestToRghtBound, and compute its distance to
481  right bound. There are two cases to consider.
482 
483  3) Case 1: closestToLeftBound is closer than closestToRghtBound.
484  Assume that shifting by delta does not move closestToLeftBound past the
485  left bound. This means that it will be shifted by delta. However,
486  closestToRghtBound will be shifted by more than delta. So the total
487  number of points shifted by delta (|interiorToBounds|) includes
488  closestToLeftBound up to and including the point just to the left of
489  closestToRghtBound. So
490 
491  delta = rowSumDeviation/ |interiorToBounds| .
492 
493  Recall that rowSumDeviation already accounts for the non-delta shift of
494  of closestToRightBound. Now check whether our assumption is valid.
495 
496  If delta <= closestToLeftBoundDist, assumption is true so delta can be
497  applied to interiorToBounds ... and we are done.
498  Else assumption is false. Shift closestToLeftBound to the left bound.
499  Update rowSumDeviation, interiorToBounds, and identify new
500  closestToLeftBound. Repeat step 3).
501 
502  Case 2: closestToRghtBound is closer than closestToLeftBound.
503  Assume that shifting by delta does not move closestToRghtBound past right
504  bound. This means that it must be shifted by more than delta to right
505  bound. So the total number of points shifted by delta again includes
506  closestToLeftBound up to and including the point just to the left of
507  closestToRghtBound. So again compute
508 
509  delta = rowSumDeviation/ |interiorToBounds| .
510 
511  If delta <= closestToRghtBoundDist, assumption is true so delta is
512  can be applied to interiorToBounds ... and we are done
513  Else assumption is false. Put closestToRghtBound in the
514  interiorToBounds set. Remove it's contribution to rowSumDeviation,
515  identify new closestToRghtBound. Repeat step 3)
516 
517 
518  To implement, sort the oi's so things like closestToLeftBound is just index
519  into sorted array. Updaing closestToLeftBound or closestToRghtBound requires
520  increment by 1. |interiorToBounds|= closestToRghtBound - closestToLeftBound
521  To handle the case when the rowsum is low (requiring right interior shifts),
522  just flip the signs on data and use the left-shift code (and then flip back
523  before exiting function.
524  */
525  bool hasFeasibleSol;
526  Scalar notFlippedLeftBound, notFlippedRghtBound, aBigNumber, *origSorted;
527  Scalar rowSumDeviation, temp, *fixedSorted, delta;
528  Scalar closestToLeftBoundDist, closestToRghtBoundDist;
529  LocalOrdinal closestToLeftBound, closestToRghtBound;
530  LocalOrdinal* inds;
531  bool flipped;
532 
533  notFlippedLeftBound = leftBound;
534  notFlippedRghtBound = rghtBound;
535 
536  if ((Teuchos::ScalarTraits<SC>::real(rsumTarget) >= Teuchos::ScalarTraits<SC>::real(leftBound * as<Scalar>(nEntries))) &&
537  (Teuchos::ScalarTraits<SC>::real(rsumTarget) <= Teuchos::ScalarTraits<SC>::real(rghtBound * as<Scalar>(nEntries))))
538  hasFeasibleSol = true;
539  else {
540  hasFeasibleSol = false;
541  return hasFeasibleSol;
542  }
543  flipped = false;
544  // compute aBigNumber to handle some corner cases where we need
545  // something large so that an if statement will be false
546  aBigNumber = Teuchos::ScalarTraits<SC>::zero();
547  for (LocalOrdinal i = 0; i < nEntries; i++) {
549  aBigNumber = Teuchos::ScalarTraits<SC>::magnitude(orig[i]);
550  }
551  aBigNumber = aBigNumber + (Teuchos::ScalarTraits<SC>::magnitude(leftBound) + Teuchos::ScalarTraits<SC>::magnitude(rghtBound)) * as<Scalar>(100.0);
552 
553  origSorted = &scalarData[0];
554  fixedSorted = &(scalarData[nEntries]);
555  inds = (LocalOrdinal*)&(scalarData[2 * nEntries]);
556 
557  for (LocalOrdinal i = 0; i < nEntries; i++) inds[i] = i;
558  for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[i]; /* orig no longer used */
559 
560  // sort so that orig[inds] is sorted.
561  std::sort(inds, inds + nEntries,
562  [origSorted](LocalOrdinal leftIndex, LocalOrdinal rightIndex) { return Teuchos::ScalarTraits<SC>::real(origSorted[leftIndex]) < Teuchos::ScalarTraits<SC>::real(origSorted[rightIndex]); });
563 
564  for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[inds[i]];
565  // find entry in origSorted just to the right of the leftBound
566  closestToLeftBound = 0;
567  while ((closestToLeftBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToLeftBound]) <= Teuchos::ScalarTraits<SC>::real(leftBound))) closestToLeftBound++;
568 
569  // find entry in origSorted just to the right of the rghtBound
570  closestToRghtBound = closestToLeftBound;
571  while ((closestToRghtBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToRghtBound]) <= Teuchos::ScalarTraits<SC>::real(rghtBound))) closestToRghtBound++;
572 
573  // compute distance between closestToLeftBound and the left bound and the
574  // distance between closestToRghtBound and the right bound.
575 
576  closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
577  if (closestToRghtBound == nEntries)
578  closestToRghtBoundDist = aBigNumber;
579  else
580  closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
581 
582  // compute how far the rowSum is off from the target row sum taking into account
583  // numbers that have been shifted to satisfy bound constraint
584 
585  rowSumDeviation = leftBound * as<Scalar>(closestToLeftBound) + as<Scalar>((nEntries - closestToRghtBound)) * rghtBound - rsumTarget;
586  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted[i];
587 
588  // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
589  // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
590  // Later we will flip the data back to its original form.
592  flipped = true;
593  temp = leftBound;
594  leftBound = -rghtBound;
595  rghtBound = temp;
596 
597  /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
598 
599  if ((nEntries % 2) == 1) origSorted[(nEntries / 2)] = -origSorted[(nEntries / 2)];
600  for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
601  temp = origSorted[i];
602  origSorted[i] = -origSorted[nEntries - 1 - i];
603  origSorted[nEntries - i - 1] = -temp;
604  }
605 
606  /* reverse bounds */
607 
608  LocalOrdinal itemp = closestToLeftBound;
609  closestToLeftBound = nEntries - closestToRghtBound;
610  closestToRghtBound = nEntries - itemp;
611  closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
612  if (closestToRghtBound == nEntries)
613  closestToRghtBoundDist = aBigNumber;
614  else
615  closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
616 
617  rowSumDeviation = -rowSumDeviation;
618  }
619 
620  // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
621 
622  for (LocalOrdinal i = 0; i < closestToLeftBound; i++) fixedSorted[i] = leftBound;
623  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i];
624  for (LocalOrdinal i = closestToRghtBound; i < nEntries; i++) fixedSorted[i] = rghtBound;
625 
626  while ((Teuchos::ScalarTraits<SC>::magnitude(rowSumDeviation) > Teuchos::ScalarTraits<SC>::magnitude(as<Scalar>(1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
627  if (closestToRghtBound != closestToLeftBound)
628  delta = rowSumDeviation / as<Scalar>(closestToRghtBound - closestToLeftBound);
629  else
630  delta = aBigNumber;
631 
632  if (Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
633  if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist)) {
634  rowSumDeviation = Teuchos::ScalarTraits<SC>::zero();
635  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
636  } else {
637  rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
638  fixedSorted[closestToLeftBound] = leftBound;
639  closestToLeftBound++;
640  if (closestToLeftBound < nEntries)
641  closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
642  else
643  closestToLeftBoundDist = aBigNumber;
644  }
645  } else {
646  if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
647  rowSumDeviation = 0;
648  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
649  } else {
650  rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
651  // if (closestToRghtBound < nEntries) {
652  fixedSorted[closestToRghtBound] = origSorted[closestToRghtBound];
653  closestToRghtBound++;
654  // }
655  if (closestToRghtBound >= nEntries)
656  closestToRghtBoundDist = aBigNumber;
657  else
658  closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
659  }
660  }
661  }
662 
663  if (flipped) {
664  /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
665 
666  if ((nEntries % 2) == 1) fixedSorted[(nEntries / 2)] = -fixedSorted[(nEntries / 2)];
667  for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
668  temp = fixedSorted[i];
669  fixedSorted[i] = -fixedSorted[nEntries - 1 - i];
670  fixedSorted[nEntries - i - 1] = -temp;
671  }
672  }
673  for (LocalOrdinal i = 0; i < nEntries; i++) fixedUnsorted[inds[i]] = fixedSorted[i];
674 
675  /* check that no constraints are violated */
676 
677  bool lowerViolation = false;
678  bool upperViolation = false;
679  bool sumViolation = false;
680  using TST = Teuchos::ScalarTraits<SC>;
681  temp = TST::zero();
682  for (LocalOrdinal i = 0; i < nEntries; i++) {
683  if (TST::real(fixedUnsorted[i]) < TST::real(notFlippedLeftBound)) lowerViolation = true;
684  if (TST::real(fixedUnsorted[i]) > TST::real(notFlippedRghtBound)) upperViolation = true;
685  temp += fixedUnsorted[i];
686  }
687  SC tol = as<Scalar>(std::max(1.0e-8, as<double>(100 * TST::eps())));
688  if (TST::magnitude(temp - rsumTarget) > TST::magnitude(tol * rsumTarget)) sumViolation = true;
689 
690  TEUCHOS_TEST_FOR_EXCEPTION(lowerViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a lower bound violation??? ");
691  TEUCHOS_TEST_FOR_EXCEPTION(upperViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in an upper bound violation??? ");
692  TEUCHOS_TEST_FOR_EXCEPTION(sumViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a row sum violation??? ");
693 
694  return hasFeasibleSol;
695 }
696 
697 template <typename local_matrix_type>
699  using Scalar = typename local_matrix_type::non_const_value_type;
700  using SC = Scalar;
701  using LO = typename local_matrix_type::non_const_ordinal_type;
702  using Device = typename local_matrix_type::device_type;
703 #if KOKKOS_VERSION >= 40799
704  using KAT = KokkosKernels::ArithTraits<SC>;
705 #else
706  using KAT = Kokkos::ArithTraits<SC>;
707 #endif
708  const Scalar zero = KAT::zero();
709  const Scalar one = KAT::one();
711  local_matrix_type localP;
712  Kokkos::View<SC**, Device> ConstraintViolationSum;
713  Kokkos::View<SC**, Device> Rsum;
714  Kokkos::View<size_t**, Device> nPositive;
715 
716  constraintKernel(LO nPDEs_, local_matrix_type localP_)
717  : nPDEs(nPDEs_)
718  , localP(localP_) {
719  ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
720  Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
721  nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
722  }
723 
724  KOKKOS_INLINE_FUNCTION
725  void operator()(const size_t rowIdx) const {
726  auto rowPtr = localP.graph.row_map;
727  auto values = localP.values;
728 
729  bool checkRow = true;
730 
731  if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
732 
733  while (checkRow) {
734  // check constraints and compute the row sum
735 
736  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
737  Rsum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
738  if (KAT::real(values(entryIdx)) < KAT::real(zero)) {
739  ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
740  values(entryIdx) = zero;
741  } else {
742  if (KAT::real(values(entryIdx)) != KAT::real(zero))
743  nPositive(rowIdx, entryIdx % nPDEs) = nPositive(rowIdx, entryIdx % nPDEs) + 1;
744 
745  if (KAT::real(values(entryIdx)) > KAT::real(1.00001)) {
746  ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += (values(entryIdx) - one);
747  values(entryIdx) = one;
748  }
749  }
750  }
751 
752  checkRow = false;
753 
754  // take into account any row sum that violates the contraints
755 
756  for (size_t k = 0; k < (size_t)nPDEs; k++) {
757  if (KAT::real(Rsum(rowIdx, k)) < KAT::magnitude(zero)) {
758  ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
759  } else if (KAT::real(Rsum(rowIdx, k)) > KAT::magnitude(1.00001)) {
760  ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) + (one - Rsum(rowIdx, k)); // rstumin
761  }
762  }
763 
764  // check if row need modification
765  for (size_t k = 0; k < (size_t)nPDEs; k++) {
766  if (KAT::magnitude(ConstraintViolationSum(rowIdx, k)) != KAT::magnitude(zero))
767  checkRow = true;
768  }
769  // modify row
770  if (checkRow) {
771  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
772  if (KAT::real(values(entryIdx)) > KAT::real(zero)) {
773  values(entryIdx) = values(entryIdx) +
774  (ConstraintViolationSum(rowIdx, entryIdx % nPDEs) / (Scalar(nPositive(rowIdx, entryIdx % nPDEs)) != zero ? Scalar(nPositive(rowIdx, entryIdx % nPDEs)) : one));
775  }
776  }
777  for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
778  }
779  for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum(rowIdx, k) = zero;
780  for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive(rowIdx, k) = 0;
781  } // while (checkRow) ...
782  }
783 };
784 
785 template <typename local_matrix_type>
787  using Scalar = typename local_matrix_type::non_const_value_type;
788  using SC = Scalar;
789  using LO = typename local_matrix_type::non_const_ordinal_type;
790  using Device = typename local_matrix_type::device_type;
791 #if KOKKOS_VERSION >= 40799
792  using KAT = KokkosKernels::ArithTraits<SC>;
793 #else
794  using KAT = Kokkos::ArithTraits<SC>;
795 #endif
796  const Scalar zero = KAT::zero();
797  const Scalar one = KAT::one();
799  local_matrix_type localP;
800  Kokkos::View<SC**, Device> origSorted;
801  Kokkos::View<SC**, Device> fixedSorted;
802  Kokkos::View<LO**, Device> inds;
803 
804  optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
805  : nPDEs(nPDEs_)
806  , localP(localP_) {
807  origSorted = Kokkos::View<SC**, Device>("origSorted", localP_.numRows(), maxRowEntries_);
808  fixedSorted = Kokkos::View<SC**, Device>("fixedSorted", localP_.numRows(), maxRowEntries_);
809  inds = Kokkos::View<LO**, Device>("inds", localP_.numRows(), maxRowEntries_);
810  }
811 
812  KOKKOS_INLINE_FUNCTION
813  void operator()(const size_t rowIdx) const {
814  auto rowPtr = localP.graph.row_map;
815  auto values = localP.values;
816 
817  auto nnz = rowPtr(rowIdx + 1) - rowPtr(rowIdx);
818 
819  if (nnz != 0) {
820  Scalar rsumTarget = zero;
821  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
822 
823  {
824  SC aBigNumber;
825  SC rowSumDeviation, temp, delta;
826  SC closestToLeftBoundDist, closestToRghtBoundDist;
827  LO closestToLeftBound, closestToRghtBound;
828  bool flipped;
829 
830  SC leftBound = zero;
831  SC rghtBound = one;
832  if ((KAT::real(rsumTarget) >= KAT::real(leftBound * (static_cast<SC>(nnz)))) &&
833  (KAT::real(rsumTarget) <= KAT::real(rghtBound * (static_cast<SC>(nnz))))) { // has Feasible solution
834 
835  flipped = false;
836  // compute aBigNumber to handle some corner cases where we need
837  // something large so that an if statement will be false
838  aBigNumber = KAT::zero();
839  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
840  if (KAT::magnitude(values(entryIdx)) > KAT::magnitude(aBigNumber))
841  aBigNumber = KAT::magnitude(values(entryIdx));
842  }
843  aBigNumber = aBigNumber + (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound)) * (100 * one);
844 
845  LO ind = 0;
846  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
847  origSorted(rowIdx, ind) = values(entryIdx);
848  inds(rowIdx, ind) = ind;
849  ind++;
850  }
851 
852  auto sortVals = Kokkos::subview(origSorted, rowIdx, Kokkos::ALL());
853  auto sortInds = Kokkos::subview(inds, rowIdx, Kokkos::make_pair(0, ind));
854  // Need permutation indices to sort row values from smallest to largest,
855  // and unsort once row constraints are applied.
856  // serial insertion sort workaround from https://github.com/kokkos/kokkos/issues/645
857  for (LO i = 1; i < static_cast<LO>(nnz); ++i) {
858  ind = sortInds(i);
859  LO j = i;
860 
861  if (KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0)))) {
862  for (; j > 0; --j) sortInds(j) = sortInds(j - 1);
863 
864  sortInds(0) = ind;
865  } else {
866  for (; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j - 1))); --j) sortInds(j) = sortInds(j - 1);
867 
868  sortInds(j) = ind;
869  }
870  }
871 
872  for (LO i = 0; i < static_cast<LO>(nnz); i++) origSorted(rowIdx, i) = values(rowPtr(rowIdx) + inds(rowIdx, i)); // values is no longer used
873  // find entry in origSorted just to the right of the leftBound
874  closestToLeftBound = 0;
875  while ((closestToLeftBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
876 
877  // find entry in origSorted just to the right of the rghtBound
878  closestToRghtBound = closestToLeftBound;
879  while ((closestToRghtBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
880 
881  // compute distance between closestToLeftBound and the left bound and the
882  // distance between closestToRghtBound and the right bound.
883 
884  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
885  if (closestToRghtBound == static_cast<LO>(nnz))
886  closestToRghtBoundDist = aBigNumber;
887  else
888  closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
889 
890  // compute how far the rowSum is off from the target row sum taking into account
891  // numbers that have been shifted to satisfy bound constraint
892 
893  rowSumDeviation = leftBound * (static_cast<SC>(closestToLeftBound)) + (static_cast<SC>(nnz - closestToRghtBound)) * rghtBound - rsumTarget;
894  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted(rowIdx, i);
895 
896  // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
897  // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
898  // Later we will flip the data back to its original form.
899  if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
900  flipped = true;
901  temp = leftBound;
902  leftBound = -rghtBound;
903  rghtBound = temp;
904 
905  /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
906 
907  // TODO: the following is bad for GPU performance. Switch to bit shifting if brave.
908  if ((nnz % 2) == 1) origSorted(rowIdx, (nnz / 2)) = -origSorted(rowIdx, (nnz / 2));
909  for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
910  temp = origSorted(rowIdx, i);
911  origSorted(rowIdx, i) = -origSorted(rowIdx, nnz - 1 - i);
912  origSorted(rowIdx, nnz - i - 1) = -temp;
913  }
914 
915  /* reverse bounds */
916 
917  LO itemp = closestToLeftBound;
918  closestToLeftBound = nnz - closestToRghtBound;
919  closestToRghtBound = nnz - itemp;
920  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
921  if (closestToRghtBound == static_cast<LO>(nnz))
922  closestToRghtBoundDist = aBigNumber;
923  else
924  closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
925 
926  rowSumDeviation = -rowSumDeviation;
927  }
928 
929  // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
930 
931  for (LO i = 0; i < closestToLeftBound; i++) fixedSorted(rowIdx, i) = leftBound;
932  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i);
933  for (LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++) fixedSorted(rowIdx, i) = rghtBound;
934 
935  while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((one * 1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
936  if (closestToRghtBound != closestToLeftBound)
937  delta = rowSumDeviation / static_cast<SC>(closestToRghtBound - closestToLeftBound);
938  else
939  delta = aBigNumber;
940 
941  if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
942  if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
943  rowSumDeviation = zero;
944  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
945  } else {
946  rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
947  fixedSorted(rowIdx, closestToLeftBound) = leftBound;
948  closestToLeftBound++;
949  if (closestToLeftBound < static_cast<LO>(nnz))
950  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
951  else
952  closestToLeftBoundDist = aBigNumber;
953  }
954  } else {
955  if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
956  rowSumDeviation = 0;
957  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
958  } else {
959  rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
960  // if (closestToRghtBound < nEntries) {
961  fixedSorted(rowIdx, closestToRghtBound) = origSorted(rowIdx, closestToRghtBound);
962  closestToRghtBound++;
963  // }
964  if (closestToRghtBound >= static_cast<LO>(nnz))
965  closestToRghtBoundDist = aBigNumber;
966  else
967  closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
968  }
969  }
970  }
971 
972  auto rowStart = rowPtr(rowIdx);
973  if (flipped) {
974  /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
975 
976  if ((nnz % 2) == 1) fixedSorted(rowIdx, (nnz / 2)) = -fixedSorted(rowIdx, (nnz / 2));
977  for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
978  temp = fixedSorted(rowIdx, i);
979  fixedSorted(rowIdx, i) = -fixedSorted(rowIdx, nnz - 1 - i);
980  fixedSorted(rowIdx, nnz - i - 1) = -temp;
981  }
982  }
983  // unsort and update row values with new values
984  for (LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart + inds(rowIdx, i)) = fixedSorted(rowIdx, i);
985 
986  } else { // row does not have feasible solution to match constraint
987  // just set all entries to the same value giving a row sum of 1
988  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) = one / (static_cast<SC>(nnz));
989  }
990  }
991  }
992  }
993 };
994 
995 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
997  using Device = typename Matrix::local_matrix_type::device_type;
998  LO nPDEs = A->GetFixedBlockSize();
999 
1000  using local_mat_type = typename Matrix::local_matrix_type;
1001  constraintKernel<local_mat_type> myKernel(nPDEs, P->getLocalMatrixDevice());
1002  Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements()),
1003  myKernel);
1004 
1005 } // SatsifyPConstraints()
1006 
1007 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1009  using Device = typename Matrix::local_matrix_type::device_type;
1010  LO nPDEs = 1; // A->GetFixedBlockSize();
1011 
1012  using local_mat_type = typename Matrix::local_matrix_type;
1013  optimalSatisfyConstraintsForScalarPDEsKernel<local_mat_type> myKernel(nPDEs, P->getLocalMaxNumRowEntries(), P->getLocalMatrixDevice());
1014  Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows()),
1015  myKernel);
1016 
1017 } // SatsifyPConstraints()
1018 
1019 } // namespace MueLu
1020 
1021 #endif // MUELU_SAPFACTORY_DEF_HPP
1022 
1023 // TODO: restrictionMode_ should use the parameter list.
typename local_matrix_type::non_const_value_type Scalar
MueLu::DefaultLocalOrdinal LocalOrdinal
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level-&gt;Get&lt; RCP&lt;Matrix&gt; &gt;(&quot;A&quot;, factory) if factory == NULL =&gt; use default factory.
void optimalSatisfyPConstraintsForScalarPDEs(RCP< Matrix > &P) const
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
typename local_matrix_type::device_type Device
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Power method.
bool constrainRow(Scalar *orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar *fixedUnsorted, Scalar *scalarData) const
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
T & get(const std::string &name, T def_value)
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
SaPFactory()
Constructor. User can supply a factory for generating the tentative prolongator.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static magnitudeType real(T a)
Print more statistics.
T * getRawPtr() const
size_type size() const
typename local_matrix_type::non_const_ordinal_type LO
constraintKernel(LO nPDEs_, local_matrix_type localP_)
LocalOrdinal LO
One-liner description of what is happening.
void SatisfyPConstraintsNonKokkos(RCP< Matrix > A, RCP< Matrix > &P) const
T * get() const
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Print even more statistics.
Kokkos::View< size_t **, Device > nPositive
void resize(const size_type n, const T &val=T())
bool isParameter(const std::string &name) const
typename local_matrix_type::device_type Device
static RCP< Matrix > Jacobi(SC omega, const Vector &Dinv, const Matrix &A, const Matrix &B, RCP< Matrix > C_in, Teuchos::FancyOStream &fos, const std::string &label, RCP< ParameterList > &params)
virtual ~SaPFactory()
Destructor.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
bool isSublist(const std::string &name) const
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
void Build(Level &fineLevel, Level &coarseLevel) const
Build method.
Kokkos::View< SC **, Device > ConstraintViolationSum
#define SET_VALID_ENTRY(name)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
T * getRawPtr() const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void SatisfyPConstraints(RCP< Matrix > A, RCP< Matrix > &P) const
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
Kokkos::ArithTraits< SC > KAT
static magnitudeType magnitude(T a)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
typename local_matrix_type::non_const_value_type Scalar
typename local_matrix_type::non_const_ordinal_type LO
Scalar SC
Kokkos::View< SC **, Device > Rsum
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:73
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
void optimalSatisfyPConstraintsForScalarPDEsNonKokkos(RCP< Matrix > &P) const
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
Exception throws to report errors in the internal logical of the program.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
bool is_null() const