MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_SaPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_SAPFACTORY_DEF_HPP
11 #define MUELU_SAPFACTORY_DEF_HPP
12 
13 #include "Kokkos_ArithTraits.hpp"
15 
16 #include <Xpetra_Matrix.hpp>
17 #include <Xpetra_IteratorOps.hpp>
18 
20 #include "MueLu_Level.hpp"
21 #include "MueLu_MasterList.hpp"
22 #include "MueLu_Monitor.hpp"
23 #include "MueLu_PerfUtils.hpp"
24 #include "MueLu_TentativePFactory.hpp"
25 #include "MueLu_Utilities.hpp"
26 
27 #include <sstream>
28 
29 namespace MueLu {
30 
31 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
33 
34 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
36 
37 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
39  RCP<ParameterList> validParamList = rcp(new ParameterList());
40 
41 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
42  SET_VALID_ENTRY("sa: damping factor");
43  SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
44  SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
45  SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
46  SET_VALID_ENTRY("sa: enforce constraints");
47  SET_VALID_ENTRY("tentative: calculate qr");
48  SET_VALID_ENTRY("sa: max eigenvalue");
49  SET_VALID_ENTRY("sa: rowsumabs diagonal replacement tolerance");
50  SET_VALID_ENTRY("sa: rowsumabs diagonal replacement value");
51  SET_VALID_ENTRY("sa: rowsumabs replace single entry row with zero");
52  SET_VALID_ENTRY("sa: rowsumabs use automatic diagonal tolerance");
53  SET_VALID_ENTRY("use kokkos refactor");
54 #undef SET_VALID_ENTRY
55 
56  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
57  validParamList->set<RCP<const FactoryBase> >("P", Teuchos::null, "Tentative prolongator factory");
58 
59  // Make sure we don't recursively validate options for the matrixmatrix kernels
60  ParameterList norecurse;
61  norecurse.disableRecursiveValidation();
62  validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
63 
64  return validParamList;
65 }
66 
67 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
69  Input(fineLevel, "A");
70 
71  // Get default tentative prolongator factory
72  // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
73  RCP<const FactoryBase> initialPFact = GetFactory("P");
74  if (initialPFact == Teuchos::null) {
75  initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
76  }
77  coarseLevel.DeclareInput("P", initialPFact.get(), this);
78 }
79 
80 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
82  return BuildP(fineLevel, coarseLevel);
83 }
84 
85 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
87  FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
88 
89  std::string levelIDs = toString(coarseLevel.GetLevelID());
90 
91  const std::string prefix = "MueLu::SaPFactory(" + levelIDs + "): ";
92 
93  typedef typename Teuchos::ScalarTraits<SC>::coordinateType Coordinate;
94  typedef typename Teuchos::ScalarTraits<SC>::magnitudeType MT;
95 
96  // Get default tentative prolongator factory
97  // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
98  // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
99  RCP<const FactoryBase> initialPFact = GetFactory("P");
100  if (initialPFact == Teuchos::null) {
101  initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
102  }
103  const ParameterList& pL = GetParameterList();
104 
105  // Level Get
106  RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
107  RCP<Matrix> Ptent = coarseLevel.Get<RCP<Matrix> >("P", initialPFact.get());
108  RCP<Matrix> finalP;
109  // If Tentative facctory bailed out (e.g., number of global aggregates is 0), then SaPFactory bails
110  // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
111  if (Ptent == Teuchos::null) {
112  finalP = Teuchos::null;
113  Set(coarseLevel, "P", finalP);
114  return;
115  }
116 
117  if (restrictionMode_) {
118  SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
119  A = Utilities::Transpose(*A, true); // build transpose of A explicitly
120  }
121 
122  // Build final prolongator
123 
124  // Reuse pattern if available
125  RCP<ParameterList> APparams;
126  if (pL.isSublist("matrixmatrix: kernel params"))
127  APparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
128  else
129  APparams = rcp(new ParameterList);
130  if (coarseLevel.IsAvailable("AP reuse data", this)) {
131  GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
132 
133  APparams = coarseLevel.Get<RCP<ParameterList> >("AP reuse data", this);
134 
135  if (APparams->isParameter("graph"))
136  finalP = APparams->get<RCP<Matrix> >("graph");
137  }
138  // By default, we don't need global constants for SaP
139  APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false));
140  APparams->set("compute global constants", APparams->get("compute global constants", false));
141 
142  const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
143  const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
144  const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
145  const bool useAbsValueRowSum = pL.get<bool>("sa: use rowsumabs diagonal scaling");
146  const bool doQRStep = pL.get<bool>("tentative: calculate qr");
147  const bool enforceConstraints = pL.get<bool>("sa: enforce constraints");
148  const MT userDefinedMaxEigen = as<MT>(pL.get<double>("sa: max eigenvalue"));
149  double dTol = pL.get<double>("sa: rowsumabs diagonal replacement tolerance");
150  const MT diagonalReplacementTolerance = (dTol == as<double>(-1) ? Teuchos::ScalarTraits<MT>::eps() * 100 : as<MT>(pL.get<double>("sa: rowsumabs diagonal replacement tolerance")));
151  const SC diagonalReplacementValue = as<SC>(pL.get<double>("sa: rowsumabs diagonal replacement value"));
152  const bool replaceSingleEntryRowWithZero = pL.get<bool>("sa: rowsumabs replace single entry row with zero");
153  const bool useAutomaticDiagTol = pL.get<bool>("sa: rowsumabs use automatic diagonal tolerance");
154 
155  // Sanity checking
156  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints, Exceptions::RuntimeError,
157  "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
158 
159  if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
160  Scalar lambdaMax;
161  Teuchos::RCP<Vector> invDiag;
162  if (userDefinedMaxEigen == -1.) {
163  SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
164  lambdaMax = A->GetMaxEigenvalueEstimate();
165  if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
166  GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = " << maxEigenIterations << ((useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
167  Coordinate stopTol = 1e-4;
168  if (useAbsValueRowSum) {
169  const bool returnReciprocal = true;
170  invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
171  diagonalReplacementTolerance,
172  diagonalReplacementValue,
173  replaceSingleEntryRowWithZero,
174  useAutomaticDiagTol);
175  TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError,
176  "SaPFactory: eigenvalue estimate: diagonal reciprocal is null.");
177  lambdaMax = Utilities::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
178  } else
179  lambdaMax = Utilities::PowerMethod(*A, true, maxEigenIterations, stopTol);
180  A->SetMaxEigenvalueEstimate(lambdaMax);
181  } else {
182  GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
183  }
184  } else {
185  lambdaMax = userDefinedMaxEigen;
186  A->SetMaxEigenvalueEstimate(lambdaMax);
187  }
188  GetOStream(Statistics1) << "Prolongator damping factor = " << dampingFactor / lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
189 
190  {
191  SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
192  if (!useAbsValueRowSum)
193  invDiag = Utilities::GetMatrixDiagonalInverse(*A); // default
194  else if (invDiag == Teuchos::null) {
195  GetOStream(Runtime0) << "Using rowsumabs diagonal" << std::endl;
196  const bool returnReciprocal = true;
197  invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
198  diagonalReplacementTolerance,
199  diagonalReplacementValue,
200  replaceSingleEntryRowWithZero,
201  useAutomaticDiagTol);
202  TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: diagonal reciprocal is null.");
203  }
204 
205  SC omega = dampingFactor / lambdaMax;
206  TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
207 
208  {
209  SubFactoryMonitor m3(*this, "Xpetra::IteratorOps::Jacobi", coarseLevel);
210  // finalP = Ptent + (I - \omega D^{-1}A) Ptent
211  finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
212  if (enforceConstraints) {
213  if (!pL.get<bool>("use kokkos refactor")) {
214  if (A->GetFixedBlockSize() == 1)
215  optimalSatisfyPConstraintsForScalarPDEsNonKokkos(finalP);
216  else
217  SatisfyPConstraintsNonKokkos(A, finalP);
218  } else {
219  // if (A->GetFixedBlockSize() == 1)
220  // optimalSatisfyPConstraintsForScalarPDEs(finalP);
221  // else
222  SatisfyPConstraints(A, finalP);
223  }
224  }
225  }
226  }
227 
228  } else {
229  finalP = Ptent;
230  }
231 
232  // Level Set
233  RCP<Matrix> R;
234  if (!restrictionMode_) {
235  // prolongation factory is in prolongation mode
236  if (!finalP.is_null()) {
237  std::ostringstream oss;
238  oss << "P_" << coarseLevel.GetLevelID();
239  finalP->setObjectLabel(oss.str());
240  }
241  Set(coarseLevel, "P", finalP);
242 
243  APparams->set("graph", finalP);
244  Set(coarseLevel, "AP reuse data", APparams);
245 
246  // NOTE: EXPERIMENTAL
247  if (Ptent->IsView("stridedMaps"))
248  finalP->CreateView("stridedMaps", Ptent);
249 
250  } else {
251  // prolongation factory is in restriction mode
252  {
253  SubFactoryMonitor m2(*this, "Transpose P", coarseLevel);
254  R = Utilities::Transpose(*finalP, true);
255  if (!R.is_null()) {
256  std::ostringstream oss;
257  oss << "R_" << coarseLevel.GetLevelID();
258  R->setObjectLabel(oss.str());
259  }
260  }
261 
262  Set(coarseLevel, "R", R);
263 
264  // NOTE: EXPERIMENTAL
265  if (Ptent->IsView("stridedMaps"))
266  R->CreateView("stridedMaps", Ptent, true /*transposeA*/);
267  }
268 
269  if (IsPrint(Statistics2)) {
270  RCP<ParameterList> params = rcp(new ParameterList());
271  params->set("printLoadBalancingInfo", true);
272  params->set("printCommInfo", true);
273  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo((!restrictionMode_ ? *finalP : *R), (!restrictionMode_ ? "P" : "R"), params);
274  }
275 
276 } // Build()
277 
278 // Analyze the grid transfer produced by smoothed aggregation and make
279 // modifications if it does not look right. In particular, if there are
280 // negative entries or entries larger than 1, modify P's rows.
281 //
282 // Note: this kind of evaluation probably only makes sense if not doing QR
283 // when constructing tentative P.
284 //
285 // For entries that do not satisfy the two constraints (>= 0 or <=1) we set
286 // these entries to the constraint value and modify the rest of the row
287 // so that the row sum remains the same as before by adding an equal
288 // amount to each remaining entry. However, if the original row sum value
289 // violates the constraints, we set the row sum back to 1 (the row sum of
290 // tentative P). After doing the modification to a row, we need to check
291 // again the entire row to make sure that the modified row does not violate
292 // the constraints.
293 
294 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
298  LO nPDEs = A->GetFixedBlockSize();
299  Teuchos::ArrayRCP<Scalar> ConstraintViolationSum(nPDEs);
300  Teuchos::ArrayRCP<Scalar> Rsum(nPDEs);
301  Teuchos::ArrayRCP<size_t> nPositive(nPDEs);
302  for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
303  for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
304  for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
305 
306  for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
310  P->getLocalRowView((LO)i, indices, vals1);
311  size_t nnz = indices.size();
312  if (nnz == 0) continue;
313 
314  vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
315 
316  bool checkRow = true;
317 
318  while (checkRow) {
319  // check constraints and compute the row sum
320 
321  for (LO j = 0; j < indices.size(); j++) {
322  Rsum[j % nPDEs] += vals[j];
324  ConstraintViolationSum[j % nPDEs] += vals[j];
325  vals[j] = zero;
326  } else {
328  (nPositive[j % nPDEs])++;
329 
331  ConstraintViolationSum[j % nPDEs] += (vals[j] - one);
332  vals[j] = one;
333  }
334  }
335  }
336 
337  checkRow = false;
338 
339  // take into account any row sum that violates the contraints
340 
341  for (size_t k = 0; k < (size_t)nPDEs; k++) {
343  ConstraintViolationSum[k] += (-Rsum[k]); // rstumin
345  ConstraintViolationSum[k] += (one - Rsum[k]); // rstumin
346  }
347  }
348 
349  // check if row need modification
350  for (size_t k = 0; k < (size_t)nPDEs; k++) {
351  if (Teuchos::ScalarTraits<SC>::magnitude(ConstraintViolationSum[k]) != Teuchos::ScalarTraits<SC>::magnitude(zero))
352  checkRow = true;
353  }
354  // modify row
355  if (checkRow) {
356  for (LO j = 0; j < indices.size(); j++) {
358  vals[j] += (ConstraintViolationSum[j % nPDEs] / (as<Scalar>(nPositive[j % nPDEs])));
359  }
360  }
361  for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
362  }
363  for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
364  for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
365  } // while (checkRow) ...
366  } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
367 } // SatsifyPConstraints()
368 
369 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
373 
374  LocalOrdinal maxEntriesPerRow = 100; // increased later if needed
375  Teuchos::ArrayRCP<Scalar> scalarData(3 * maxEntriesPerRow);
376  bool hasFeasible;
377 
378  for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
382  P->getLocalRowView((LocalOrdinal)i, indices, vals1);
383  size_t nnz = indices.size();
384  if (nnz != 0) {
385  vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
386  Scalar rsumTarget = zero;
387  for (size_t j = 0; j < nnz; j++) rsumTarget += vals[j];
388 
389  if (nnz > as<size_t>(maxEntriesPerRow)) {
390  maxEntriesPerRow = nnz * 3;
391  scalarData.resize(3 * maxEntriesPerRow);
392  }
393  hasFeasible = constrainRow(vals.getRawPtr(), as<LocalOrdinal>(nnz), zero, one, rsumTarget, vals.getRawPtr(), scalarData.getRawPtr());
394 
395  if (!hasFeasible) { // just set all entries to the same value giving a row sum of 1
396  for (size_t j = 0; j < nnz; j++) vals[j] = one / as<Scalar>(nnz);
397  }
398  }
399 
400  } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
401 } // SatsifyPConstraints()
402 
403 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
404 bool SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::constrainRow(Scalar* orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar* fixedUnsorted, Scalar* scalarData) const {
405  /*
406  Input
407  orig data that should be adjusted to satisfy bound constraints and
408  row sum constraint. orig is not modified by this function.
409 
410  nEntries length or 'orig'
411 
412  leftBound, define bound constraints for the results.
413  rghtBound
414 
415  rsumTarget defines an equality constraint for the row sum
416 
417  fixedUnsorted on output, if a feasible solutuion exists then
418  || orig - fixedUnsorted || = min when also
419  leftBound <= fixedUnsorted[i] <= rghtBound for all i
420  and sum(fixedUnsorted) = rsumTarget.
421 
422  Note: it is possible to use the same pointer for
423  fixedUnsorted and orig. In this case, orig gets
424  overwritten with the new constraint satisfying values.
425 
426  scalarData a work array that should be 3x nEntries.
427 
428  On return constrain() indicates whether or not a feasible solution exists.
429  */
430 
431  /*
432  Given a sequence of numbers o1 ... on, fix these so that they are as
433  close as possible to the original but satisfy bound constraints and also
434  have the same row sum as the oi's. If we know who is going to lie on a
435  bound, then the "best" answer (i.e., || o - f ||_2 = min) perturbs
436  each element that doesn't lie on a bound by the same amount.
437 
438  We can represent the oi's by considering scattered points on a number line
439 
440  | |
441  | |
442  o o o | o o o o o o |o o
443  | |
444 
445  \____/ \____/
446  <---- <----
447  delta delta
448 
449  Bounds are shown by vertical lines. The fi's must lie within the bounds, so
450  the 3 leftmost points must be shifted to the right and the 2 rightmost must
451  be shifted to the left. If these fi points are all shifted to the bounds
452  while the others remain in the same location, the row sum constraint is
453  likely not satisfied and so more shifting is necessary. In the figure, the f
454  rowsum is too large and so there must be more shifting to the left.
455 
456  To minimize || o - f ||_2, we basically shift all "interiors" by the same
457  amount, denoted delta. The only trick is that some points near bounds are
458  still affected by the bounds and so these points might be shifted more or less
459  than delta. In the example,t he 3 rightmost points are shifted in the opposite
460  direction as delta to the bound. The 4th point is shifted by something less
461  than delta so it does not violate the lower bound. The rightmost point is
462  shifted to the bound by some amount larger than delta. However, the 2nd point
463  is shifted by delta (i.e., it lies inside the two bounds).
464 
465  If we know delta, we can figure out everything. If we know which points
466  are special (not shifted by delta), we can also figure out everything.
467  The problem is these two things (delta and the special points) are
468  inter-connected. An algorithm for computing follows.
469 
470  1) move exterior points to the bounds and compute how much the row sum is off
471  (rowSumDeviation). We assume now that the new row sum is high, so interior
472  points must be shifted left.
473 
474  2) Mark closest point just left of the leftmost bound, closestToLeftBound,
475  and compute its distance to the leftmost bound. Mark closest point to the
476  left of the rightmost bound, closestToRghtBound, and compute its distance to
477  right bound. There are two cases to consider.
478 
479  3) Case 1: closestToLeftBound is closer than closestToRghtBound.
480  Assume that shifting by delta does not move closestToLeftBound past the
481  left bound. This means that it will be shifted by delta. However,
482  closestToRghtBound will be shifted by more than delta. So the total
483  number of points shifted by delta (|interiorToBounds|) includes
484  closestToLeftBound up to and including the point just to the left of
485  closestToRghtBound. So
486 
487  delta = rowSumDeviation/ |interiorToBounds| .
488 
489  Recall that rowSumDeviation already accounts for the non-delta shift of
490  of closestToRightBound. Now check whether our assumption is valid.
491 
492  If delta <= closestToLeftBoundDist, assumption is true so delta can be
493  applied to interiorToBounds ... and we are done.
494  Else assumption is false. Shift closestToLeftBound to the left bound.
495  Update rowSumDeviation, interiorToBounds, and identify new
496  closestToLeftBound. Repeat step 3).
497 
498  Case 2: closestToRghtBound is closer than closestToLeftBound.
499  Assume that shifting by delta does not move closestToRghtBound past right
500  bound. This means that it must be shifted by more than delta to right
501  bound. So the total number of points shifted by delta again includes
502  closestToLeftBound up to and including the point just to the left of
503  closestToRghtBound. So again compute
504 
505  delta = rowSumDeviation/ |interiorToBounds| .
506 
507  If delta <= closestToRghtBoundDist, assumption is true so delta is
508  can be applied to interiorToBounds ... and we are done
509  Else assumption is false. Put closestToRghtBound in the
510  interiorToBounds set. Remove it's contribution to rowSumDeviation,
511  identify new closestToRghtBound. Repeat step 3)
512 
513 
514  To implement, sort the oi's so things like closestToLeftBound is just index
515  into sorted array. Updaing closestToLeftBound or closestToRghtBound requires
516  increment by 1. |interiorToBounds|= closestToRghtBound - closestToLeftBound
517  To handle the case when the rowsum is low (requiring right interior shifts),
518  just flip the signs on data and use the left-shift code (and then flip back
519  before exiting function.
520  */
521  bool hasFeasibleSol;
522  Scalar notFlippedLeftBound, notFlippedRghtBound, aBigNumber, *origSorted;
523  Scalar rowSumDeviation, temp, *fixedSorted, delta;
524  Scalar closestToLeftBoundDist, closestToRghtBoundDist;
525  LocalOrdinal closestToLeftBound, closestToRghtBound;
526  LocalOrdinal* inds;
527  bool flipped;
528 
529  notFlippedLeftBound = leftBound;
530  notFlippedRghtBound = rghtBound;
531 
532  if ((Teuchos::ScalarTraits<SC>::real(rsumTarget) >= Teuchos::ScalarTraits<SC>::real(leftBound * as<Scalar>(nEntries))) &&
533  (Teuchos::ScalarTraits<SC>::real(rsumTarget) <= Teuchos::ScalarTraits<SC>::real(rghtBound * as<Scalar>(nEntries))))
534  hasFeasibleSol = true;
535  else {
536  hasFeasibleSol = false;
537  return hasFeasibleSol;
538  }
539  flipped = false;
540  // compute aBigNumber to handle some corner cases where we need
541  // something large so that an if statement will be false
542  aBigNumber = Teuchos::ScalarTraits<SC>::zero();
543  for (LocalOrdinal i = 0; i < nEntries; i++) {
545  aBigNumber = Teuchos::ScalarTraits<SC>::magnitude(orig[i]);
546  }
547  aBigNumber = aBigNumber + (Teuchos::ScalarTraits<SC>::magnitude(leftBound) + Teuchos::ScalarTraits<SC>::magnitude(rghtBound)) * as<Scalar>(100.0);
548 
549  origSorted = &scalarData[0];
550  fixedSorted = &(scalarData[nEntries]);
551  inds = (LocalOrdinal*)&(scalarData[2 * nEntries]);
552 
553  for (LocalOrdinal i = 0; i < nEntries; i++) inds[i] = i;
554  for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[i]; /* orig no longer used */
555 
556  // sort so that orig[inds] is sorted.
557  std::sort(inds, inds + nEntries,
558  [origSorted](LocalOrdinal leftIndex, LocalOrdinal rightIndex) { return Teuchos::ScalarTraits<SC>::real(origSorted[leftIndex]) < Teuchos::ScalarTraits<SC>::real(origSorted[rightIndex]); });
559 
560  for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[inds[i]];
561  // find entry in origSorted just to the right of the leftBound
562  closestToLeftBound = 0;
563  while ((closestToLeftBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToLeftBound]) <= Teuchos::ScalarTraits<SC>::real(leftBound))) closestToLeftBound++;
564 
565  // find entry in origSorted just to the right of the rghtBound
566  closestToRghtBound = closestToLeftBound;
567  while ((closestToRghtBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToRghtBound]) <= Teuchos::ScalarTraits<SC>::real(rghtBound))) closestToRghtBound++;
568 
569  // compute distance between closestToLeftBound and the left bound and the
570  // distance between closestToRghtBound and the right bound.
571 
572  closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
573  if (closestToRghtBound == nEntries)
574  closestToRghtBoundDist = aBigNumber;
575  else
576  closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
577 
578  // compute how far the rowSum is off from the target row sum taking into account
579  // numbers that have been shifted to satisfy bound constraint
580 
581  rowSumDeviation = leftBound * as<Scalar>(closestToLeftBound) + as<Scalar>((nEntries - closestToRghtBound)) * rghtBound - rsumTarget;
582  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted[i];
583 
584  // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
585  // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
586  // Later we will flip the data back to its original form.
588  flipped = true;
589  temp = leftBound;
590  leftBound = -rghtBound;
591  rghtBound = temp;
592 
593  /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
594 
595  if ((nEntries % 2) == 1) origSorted[(nEntries / 2)] = -origSorted[(nEntries / 2)];
596  for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
597  temp = origSorted[i];
598  origSorted[i] = -origSorted[nEntries - 1 - i];
599  origSorted[nEntries - i - 1] = -temp;
600  }
601 
602  /* reverse bounds */
603 
604  LocalOrdinal itemp = closestToLeftBound;
605  closestToLeftBound = nEntries - closestToRghtBound;
606  closestToRghtBound = nEntries - itemp;
607  closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
608  if (closestToRghtBound == nEntries)
609  closestToRghtBoundDist = aBigNumber;
610  else
611  closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
612 
613  rowSumDeviation = -rowSumDeviation;
614  }
615 
616  // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
617 
618  for (LocalOrdinal i = 0; i < closestToLeftBound; i++) fixedSorted[i] = leftBound;
619  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i];
620  for (LocalOrdinal i = closestToRghtBound; i < nEntries; i++) fixedSorted[i] = rghtBound;
621 
622  while ((Teuchos::ScalarTraits<SC>::magnitude(rowSumDeviation) > Teuchos::ScalarTraits<SC>::magnitude(as<Scalar>(1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
623  if (closestToRghtBound != closestToLeftBound)
624  delta = rowSumDeviation / as<Scalar>(closestToRghtBound - closestToLeftBound);
625  else
626  delta = aBigNumber;
627 
628  if (Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
629  if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist)) {
630  rowSumDeviation = Teuchos::ScalarTraits<SC>::zero();
631  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
632  } else {
633  rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
634  fixedSorted[closestToLeftBound] = leftBound;
635  closestToLeftBound++;
636  if (closestToLeftBound < nEntries)
637  closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
638  else
639  closestToLeftBoundDist = aBigNumber;
640  }
641  } else {
642  if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
643  rowSumDeviation = 0;
644  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
645  } else {
646  rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
647  // if (closestToRghtBound < nEntries) {
648  fixedSorted[closestToRghtBound] = origSorted[closestToRghtBound];
649  closestToRghtBound++;
650  // }
651  if (closestToRghtBound >= nEntries)
652  closestToRghtBoundDist = aBigNumber;
653  else
654  closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
655  }
656  }
657  }
658 
659  if (flipped) {
660  /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
661 
662  if ((nEntries % 2) == 1) fixedSorted[(nEntries / 2)] = -fixedSorted[(nEntries / 2)];
663  for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
664  temp = fixedSorted[i];
665  fixedSorted[i] = -fixedSorted[nEntries - 1 - i];
666  fixedSorted[nEntries - i - 1] = -temp;
667  }
668  }
669  for (LocalOrdinal i = 0; i < nEntries; i++) fixedUnsorted[inds[i]] = fixedSorted[i];
670 
671  /* check that no constraints are violated */
672 
673  bool lowerViolation = false;
674  bool upperViolation = false;
675  bool sumViolation = false;
676  using TST = Teuchos::ScalarTraits<SC>;
677  temp = TST::zero();
678  for (LocalOrdinal i = 0; i < nEntries; i++) {
679  if (TST::real(fixedUnsorted[i]) < TST::real(notFlippedLeftBound)) lowerViolation = true;
680  if (TST::real(fixedUnsorted[i]) > TST::real(notFlippedRghtBound)) upperViolation = true;
681  temp += fixedUnsorted[i];
682  }
683  SC tol = as<Scalar>(std::max(1.0e-8, as<double>(100 * TST::eps())));
684  if (TST::magnitude(temp - rsumTarget) > TST::magnitude(tol * rsumTarget)) sumViolation = true;
685 
686  TEUCHOS_TEST_FOR_EXCEPTION(lowerViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a lower bound violation??? ");
687  TEUCHOS_TEST_FOR_EXCEPTION(upperViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in an upper bound violation??? ");
688  TEUCHOS_TEST_FOR_EXCEPTION(sumViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a row sum violation??? ");
689 
690  return hasFeasibleSol;
691 }
692 
693 template <typename local_matrix_type>
695  using Scalar = typename local_matrix_type::non_const_value_type;
696  using SC = Scalar;
697  using LO = typename local_matrix_type::non_const_ordinal_type;
698  using Device = typename local_matrix_type::device_type;
699  using KAT = Kokkos::ArithTraits<SC>;
700  const Scalar zero = KAT::zero();
701  const Scalar one = KAT::one();
703  local_matrix_type localP;
704  Kokkos::View<SC**, Device> ConstraintViolationSum;
705  Kokkos::View<SC**, Device> Rsum;
706  Kokkos::View<size_t**, Device> nPositive;
707 
708  constraintKernel(LO nPDEs_, local_matrix_type localP_)
709  : nPDEs(nPDEs_)
710  , localP(localP_) {
711  ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
712  Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
713  nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
714  }
715 
716  KOKKOS_INLINE_FUNCTION
717  void operator()(const size_t rowIdx) const {
718  auto rowPtr = localP.graph.row_map;
719  auto values = localP.values;
720 
721  bool checkRow = true;
722 
723  if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
724 
725  while (checkRow) {
726  // check constraints and compute the row sum
727 
728  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
729  Rsum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
730  if (KAT::real(values(entryIdx)) < KAT::real(zero)) {
731  ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
732  values(entryIdx) = zero;
733  } else {
734  if (KAT::real(values(entryIdx)) != KAT::real(zero))
735  nPositive(rowIdx, entryIdx % nPDEs) = nPositive(rowIdx, entryIdx % nPDEs) + 1;
736 
737  if (KAT::real(values(entryIdx)) > KAT::real(1.00001)) {
738  ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += (values(entryIdx) - one);
739  values(entryIdx) = one;
740  }
741  }
742  }
743 
744  checkRow = false;
745 
746  // take into account any row sum that violates the contraints
747 
748  for (size_t k = 0; k < (size_t)nPDEs; k++) {
749  if (KAT::real(Rsum(rowIdx, k)) < KAT::magnitude(zero)) {
750  ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
751  } else if (KAT::real(Rsum(rowIdx, k)) > KAT::magnitude(1.00001)) {
752  ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) + (one - Rsum(rowIdx, k)); // rstumin
753  }
754  }
755 
756  // check if row need modification
757  for (size_t k = 0; k < (size_t)nPDEs; k++) {
758  if (KAT::magnitude(ConstraintViolationSum(rowIdx, k)) != KAT::magnitude(zero))
759  checkRow = true;
760  }
761  // modify row
762  if (checkRow) {
763  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
764  if (KAT::real(values(entryIdx)) > KAT::real(zero)) {
765  values(entryIdx) = values(entryIdx) +
766  (ConstraintViolationSum(rowIdx, entryIdx % nPDEs) / (Scalar(nPositive(rowIdx, entryIdx % nPDEs)) != zero ? Scalar(nPositive(rowIdx, entryIdx % nPDEs)) : one));
767  }
768  }
769  for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
770  }
771  for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum(rowIdx, k) = zero;
772  for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive(rowIdx, k) = 0;
773  } // while (checkRow) ...
774  }
775 };
776 
777 template <typename local_matrix_type>
779  using Scalar = typename local_matrix_type::non_const_value_type;
780  using SC = Scalar;
781  using LO = typename local_matrix_type::non_const_ordinal_type;
782  using Device = typename local_matrix_type::device_type;
783  using KAT = Kokkos::ArithTraits<SC>;
784  const Scalar zero = KAT::zero();
785  const Scalar one = KAT::one();
787  local_matrix_type localP;
788  Kokkos::View<SC**, Device> origSorted;
789  Kokkos::View<SC**, Device> fixedSorted;
790  Kokkos::View<LO**, Device> inds;
791 
792  optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
793  : nPDEs(nPDEs_)
794  , localP(localP_) {
795  origSorted = Kokkos::View<SC**, Device>("origSorted", localP_.numRows(), maxRowEntries_);
796  fixedSorted = Kokkos::View<SC**, Device>("fixedSorted", localP_.numRows(), maxRowEntries_);
797  inds = Kokkos::View<LO**, Device>("inds", localP_.numRows(), maxRowEntries_);
798  }
799 
800  KOKKOS_INLINE_FUNCTION
801  void operator()(const size_t rowIdx) const {
802  auto rowPtr = localP.graph.row_map;
803  auto values = localP.values;
804 
805  auto nnz = rowPtr(rowIdx + 1) - rowPtr(rowIdx);
806 
807  if (nnz != 0) {
808  Scalar rsumTarget = zero;
809  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
810 
811  {
812  SC aBigNumber;
813  SC rowSumDeviation, temp, delta;
814  SC closestToLeftBoundDist, closestToRghtBoundDist;
815  LO closestToLeftBound, closestToRghtBound;
816  bool flipped;
817 
818  SC leftBound = zero;
819  SC rghtBound = one;
820  if ((KAT::real(rsumTarget) >= KAT::real(leftBound * (static_cast<SC>(nnz)))) &&
821  (KAT::real(rsumTarget) <= KAT::real(rghtBound * (static_cast<SC>(nnz))))) { // has Feasible solution
822 
823  flipped = false;
824  // compute aBigNumber to handle some corner cases where we need
825  // something large so that an if statement will be false
826  aBigNumber = KAT::zero();
827  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
828  if (KAT::magnitude(values(entryIdx)) > KAT::magnitude(aBigNumber))
829  aBigNumber = KAT::magnitude(values(entryIdx));
830  }
831  aBigNumber = aBigNumber + (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound)) * (100 * one);
832 
833  LO ind = 0;
834  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
835  origSorted(rowIdx, ind) = values(entryIdx);
836  inds(rowIdx, ind) = ind;
837  ind++;
838  }
839 
840  auto sortVals = Kokkos::subview(origSorted, rowIdx, Kokkos::ALL());
841  auto sortInds = Kokkos::subview(inds, rowIdx, Kokkos::make_pair(0, ind));
842  // Need permutation indices to sort row values from smallest to largest,
843  // and unsort once row constraints are applied.
844  // serial insertion sort workaround from https://github.com/kokkos/kokkos/issues/645
845  for (LO i = 1; i < static_cast<LO>(nnz); ++i) {
846  ind = sortInds(i);
847  LO j = i;
848 
849  if (KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0)))) {
850  for (; j > 0; --j) sortInds(j) = sortInds(j - 1);
851 
852  sortInds(0) = ind;
853  } else {
854  for (; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j - 1))); --j) sortInds(j) = sortInds(j - 1);
855 
856  sortInds(j) = ind;
857  }
858  }
859 
860  for (LO i = 0; i < static_cast<LO>(nnz); i++) origSorted(rowIdx, i) = values(rowPtr(rowIdx) + inds(rowIdx, i)); // values is no longer used
861  // find entry in origSorted just to the right of the leftBound
862  closestToLeftBound = 0;
863  while ((closestToLeftBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
864 
865  // find entry in origSorted just to the right of the rghtBound
866  closestToRghtBound = closestToLeftBound;
867  while ((closestToRghtBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
868 
869  // compute distance between closestToLeftBound and the left bound and the
870  // distance between closestToRghtBound and the right bound.
871 
872  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
873  if (closestToRghtBound == static_cast<LO>(nnz))
874  closestToRghtBoundDist = aBigNumber;
875  else
876  closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
877 
878  // compute how far the rowSum is off from the target row sum taking into account
879  // numbers that have been shifted to satisfy bound constraint
880 
881  rowSumDeviation = leftBound * (static_cast<SC>(closestToLeftBound)) + (static_cast<SC>(nnz - closestToRghtBound)) * rghtBound - rsumTarget;
882  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted(rowIdx, i);
883 
884  // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
885  // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
886  // Later we will flip the data back to its original form.
887  if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
888  flipped = true;
889  temp = leftBound;
890  leftBound = -rghtBound;
891  rghtBound = temp;
892 
893  /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
894 
895  // TODO: the following is bad for GPU performance. Switch to bit shifting if brave.
896  if ((nnz % 2) == 1) origSorted(rowIdx, (nnz / 2)) = -origSorted(rowIdx, (nnz / 2));
897  for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
898  temp = origSorted(rowIdx, i);
899  origSorted(rowIdx, i) = -origSorted(rowIdx, nnz - 1 - i);
900  origSorted(rowIdx, nnz - i - 1) = -temp;
901  }
902 
903  /* reverse bounds */
904 
905  LO itemp = closestToLeftBound;
906  closestToLeftBound = nnz - closestToRghtBound;
907  closestToRghtBound = nnz - itemp;
908  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
909  if (closestToRghtBound == static_cast<LO>(nnz))
910  closestToRghtBoundDist = aBigNumber;
911  else
912  closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
913 
914  rowSumDeviation = -rowSumDeviation;
915  }
916 
917  // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
918 
919  for (LO i = 0; i < closestToLeftBound; i++) fixedSorted(rowIdx, i) = leftBound;
920  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i);
921  for (LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++) fixedSorted(rowIdx, i) = rghtBound;
922 
923  while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((one * 1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
924  if (closestToRghtBound != closestToLeftBound)
925  delta = rowSumDeviation / static_cast<SC>(closestToRghtBound - closestToLeftBound);
926  else
927  delta = aBigNumber;
928 
929  if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
930  if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
931  rowSumDeviation = zero;
932  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
933  } else {
934  rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
935  fixedSorted(rowIdx, closestToLeftBound) = leftBound;
936  closestToLeftBound++;
937  if (closestToLeftBound < static_cast<LO>(nnz))
938  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
939  else
940  closestToLeftBoundDist = aBigNumber;
941  }
942  } else {
943  if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
944  rowSumDeviation = 0;
945  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
946  } else {
947  rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
948  // if (closestToRghtBound < nEntries) {
949  fixedSorted(rowIdx, closestToRghtBound) = origSorted(rowIdx, closestToRghtBound);
950  closestToRghtBound++;
951  // }
952  if (closestToRghtBound >= static_cast<LO>(nnz))
953  closestToRghtBoundDist = aBigNumber;
954  else
955  closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
956  }
957  }
958  }
959 
960  auto rowStart = rowPtr(rowIdx);
961  if (flipped) {
962  /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
963 
964  if ((nnz % 2) == 1) fixedSorted(rowIdx, (nnz / 2)) = -fixedSorted(rowIdx, (nnz / 2));
965  for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
966  temp = fixedSorted(rowIdx, i);
967  fixedSorted(rowIdx, i) = -fixedSorted(rowIdx, nnz - 1 - i);
968  fixedSorted(rowIdx, nnz - i - 1) = -temp;
969  }
970  }
971  // unsort and update row values with new values
972  for (LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart + inds(rowIdx, i)) = fixedSorted(rowIdx, i);
973 
974  } else { // row does not have feasible solution to match constraint
975  // just set all entries to the same value giving a row sum of 1
976  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) = one / (static_cast<SC>(nnz));
977  }
978  }
979  }
980  }
981 };
982 
983 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
985  using Device = typename Matrix::local_matrix_type::device_type;
986  LO nPDEs = A->GetFixedBlockSize();
987 
988  using local_mat_type = typename Matrix::local_matrix_type;
989  constraintKernel<local_mat_type> myKernel(nPDEs, P->getLocalMatrixDevice());
990  Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements()),
991  myKernel);
992 
993 } // SatsifyPConstraints()
994 
995 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
997  using Device = typename Matrix::local_matrix_type::device_type;
998  LO nPDEs = 1; // A->GetFixedBlockSize();
999 
1000  using local_mat_type = typename Matrix::local_matrix_type;
1001  optimalSatisfyConstraintsForScalarPDEsKernel<local_mat_type> myKernel(nPDEs, P->getLocalMaxNumRowEntries(), P->getLocalMatrixDevice());
1002  Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows()),
1003  myKernel);
1004 
1005 } // SatsifyPConstraints()
1006 
1007 } // namespace MueLu
1008 
1009 #endif // MUELU_SAPFACTORY_DEF_HPP
1010 
1011 // TODO: restrictionMode_ should use the parameter list.
typename local_matrix_type::non_const_value_type Scalar
MueLu::DefaultLocalOrdinal LocalOrdinal
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level-&gt;Get&lt; RCP&lt;Matrix&gt; &gt;(&quot;A&quot;, factory) if factory == NULL =&gt; use default factory.
void optimalSatisfyPConstraintsForScalarPDEs(RCP< Matrix > &P) const
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
typename local_matrix_type::device_type Device
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Power method.
bool constrainRow(Scalar *orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar *fixedUnsorted, Scalar *scalarData) const
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
T & get(const std::string &name, T def_value)
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
SaPFactory()
Constructor. User can supply a factory for generating the tentative prolongator.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static magnitudeType real(T a)
Print more statistics.
T * getRawPtr() const
size_type size() const
typename local_matrix_type::non_const_ordinal_type LO
constraintKernel(LO nPDEs_, local_matrix_type localP_)
LocalOrdinal LO
One-liner description of what is happening.
void SatisfyPConstraintsNonKokkos(RCP< Matrix > A, RCP< Matrix > &P) const
T * get() const
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Print even more statistics.
Kokkos::View< size_t **, Device > nPositive
void resize(const size_type n, const T &val=T())
bool isParameter(const std::string &name) const
typename local_matrix_type::device_type Device
static RCP< Matrix > Jacobi(SC omega, const Vector &Dinv, const Matrix &A, const Matrix &B, RCP< Matrix > C_in, Teuchos::FancyOStream &fos, const std::string &label, RCP< ParameterList > &params)
virtual ~SaPFactory()
Destructor.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
bool isSublist(const std::string &name) const
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
void Build(Level &fineLevel, Level &coarseLevel) const
Build method.
Kokkos::View< SC **, Device > ConstraintViolationSum
#define SET_VALID_ENTRY(name)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
T * getRawPtr() const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void SatisfyPConstraints(RCP< Matrix > A, RCP< Matrix > &P) const
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
Kokkos::ArithTraits< SC > KAT
static magnitudeType magnitude(T a)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
typename local_matrix_type::non_const_value_type Scalar
typename local_matrix_type::non_const_ordinal_type LO
Scalar SC
Kokkos::View< SC **, Device > Rsum
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:73
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
void optimalSatisfyPConstraintsForScalarPDEsNonKokkos(RCP< Matrix > &P) const
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
Exception throws to report errors in the internal logical of the program.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
bool is_null() const