MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_SaPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_SAPFACTORY_DEF_HPP
47 #define MUELU_SAPFACTORY_DEF_HPP
48 
49 #include "Kokkos_ArithTraits.hpp"
51 
52 #include <Xpetra_Matrix.hpp>
53 #include <Xpetra_IteratorOps.hpp>
54 
56 #include "MueLu_Level.hpp"
57 #include "MueLu_MasterList.hpp"
58 #include "MueLu_Monitor.hpp"
59 #include "MueLu_PerfUtils.hpp"
60 #include "MueLu_TentativePFactory.hpp"
61 #include "MueLu_Utilities.hpp"
62 
63 #include <sstream>
64 
65 namespace MueLu {
66 
67 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
69  RCP<ParameterList> validParamList = rcp(new ParameterList());
70 
71 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
72  SET_VALID_ENTRY("sa: damping factor");
73  SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
74  SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
75  SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
76  SET_VALID_ENTRY("sa: enforce constraints");
77  SET_VALID_ENTRY("tentative: calculate qr");
78  SET_VALID_ENTRY("sa: max eigenvalue");
79  SET_VALID_ENTRY("sa: rowsumabs diagonal replacement tolerance");
80  SET_VALID_ENTRY("sa: rowsumabs diagonal replacement value");
81  SET_VALID_ENTRY("sa: rowsumabs replace single entry row with zero");
82  SET_VALID_ENTRY("sa: rowsumabs use automatic diagonal tolerance");
83  SET_VALID_ENTRY("use kokkos refactor");
84 #undef SET_VALID_ENTRY
85 
86  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
87  validParamList->set<RCP<const FactoryBase> >("P", Teuchos::null, "Tentative prolongator factory");
88 
89  // Make sure we don't recursively validate options for the matrixmatrix kernels
90  ParameterList norecurse;
91  norecurse.disableRecursiveValidation();
92  validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
93 
94  return validParamList;
95 }
96 
97 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
99  Input(fineLevel, "A");
100 
101  // Get default tentative prolongator factory
102  // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
103  RCP<const FactoryBase> initialPFact = GetFactory("P");
104  if (initialPFact == Teuchos::null) {
105  initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
106  }
107  coarseLevel.DeclareInput("P", initialPFact.get(), this);
108 }
109 
110 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
112  return BuildP(fineLevel, coarseLevel);
113 }
114 
115 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
117  FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
118 
119  std::string levelIDs = toString(coarseLevel.GetLevelID());
120 
121  const std::string prefix = "MueLu::SaPFactory(" + levelIDs + "): ";
122 
123  typedef typename Teuchos::ScalarTraits<SC>::coordinateType Coordinate;
124  typedef typename Teuchos::ScalarTraits<SC>::magnitudeType MT;
125 
126  // Get default tentative prolongator factory
127  // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
128  // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
129  RCP<const FactoryBase> initialPFact = GetFactory("P");
130  if (initialPFact == Teuchos::null) {
131  initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
132  }
133  const ParameterList& pL = GetParameterList();
134 
135  // Level Get
136  RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
137  RCP<Matrix> Ptent = coarseLevel.Get<RCP<Matrix> >("P", initialPFact.get());
138  RCP<Matrix> finalP;
139  // If Tentative facctory bailed out (e.g., number of global aggregates is 0), then SaPFactory bails
140  // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
141  if (Ptent == Teuchos::null) {
142  finalP = Teuchos::null;
143  Set(coarseLevel, "P", finalP);
144  return;
145  }
146 
147  if (restrictionMode_) {
148  SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
149  A = Utilities::Transpose(*A, true); // build transpose of A explicitly
150  }
151 
152  // Build final prolongator
153 
154  // Reuse pattern if available
155  RCP<ParameterList> APparams;
156  if (pL.isSublist("matrixmatrix: kernel params"))
157  APparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
158  else
159  APparams = rcp(new ParameterList);
160  if (coarseLevel.IsAvailable("AP reuse data", this)) {
161  GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
162 
163  APparams = coarseLevel.Get<RCP<ParameterList> >("AP reuse data", this);
164 
165  if (APparams->isParameter("graph"))
166  finalP = APparams->get<RCP<Matrix> >("graph");
167  }
168  // By default, we don't need global constants for SaP
169  APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false));
170  APparams->set("compute global constants", APparams->get("compute global constants", false));
171 
172  const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
173  const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
174  const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
175  const bool useAbsValueRowSum = pL.get<bool>("sa: use rowsumabs diagonal scaling");
176  const bool doQRStep = pL.get<bool>("tentative: calculate qr");
177  const bool enforceConstraints = pL.get<bool>("sa: enforce constraints");
178  const MT userDefinedMaxEigen = as<MT>(pL.get<double>("sa: max eigenvalue"));
179  double dTol = pL.get<double>("sa: rowsumabs diagonal replacement tolerance");
180  const MT diagonalReplacementTolerance = (dTol == as<double>(-1) ? Teuchos::ScalarTraits<MT>::eps() * 100 : as<MT>(pL.get<double>("sa: rowsumabs diagonal replacement tolerance")));
181  const SC diagonalReplacementValue = as<SC>(pL.get<double>("sa: rowsumabs diagonal replacement value"));
182  const bool replaceSingleEntryRowWithZero = pL.get<bool>("sa: rowsumabs replace single entry row with zero");
183  const bool useAutomaticDiagTol = pL.get<bool>("sa: rowsumabs use automatic diagonal tolerance");
184 
185  // Sanity checking
186  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints, Exceptions::RuntimeError,
187  "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
188 
189  if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
190  Scalar lambdaMax;
191  Teuchos::RCP<Vector> invDiag;
192  if (userDefinedMaxEigen == -1.) {
193  SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
194  lambdaMax = A->GetMaxEigenvalueEstimate();
195  if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
196  GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = " << maxEigenIterations << ((useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
197  Coordinate stopTol = 1e-4;
198  if (useAbsValueRowSum) {
199  const bool returnReciprocal = true;
200  invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
201  diagonalReplacementTolerance,
202  diagonalReplacementValue,
203  replaceSingleEntryRowWithZero,
204  useAutomaticDiagTol);
205  TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError,
206  "SaPFactory: eigenvalue estimate: diagonal reciprocal is null.");
207  lambdaMax = Utilities::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
208  } else
209  lambdaMax = Utilities::PowerMethod(*A, true, maxEigenIterations, stopTol);
210  A->SetMaxEigenvalueEstimate(lambdaMax);
211  } else {
212  GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
213  }
214  } else {
215  lambdaMax = userDefinedMaxEigen;
216  A->SetMaxEigenvalueEstimate(lambdaMax);
217  }
218  GetOStream(Statistics1) << "Prolongator damping factor = " << dampingFactor / lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
219 
220  {
221  SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
222  if (!useAbsValueRowSum)
223  invDiag = Utilities::GetMatrixDiagonalInverse(*A); // default
224  else if (invDiag == Teuchos::null) {
225  GetOStream(Runtime0) << "Using rowsumabs diagonal" << std::endl;
226  const bool returnReciprocal = true;
227  invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
228  diagonalReplacementTolerance,
229  diagonalReplacementValue,
230  replaceSingleEntryRowWithZero,
231  useAutomaticDiagTol);
232  TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: diagonal reciprocal is null.");
233  }
234 
235  SC omega = dampingFactor / lambdaMax;
236  TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
237 
238  {
239  SubFactoryMonitor m3(*this, "Xpetra::IteratorOps::Jacobi", coarseLevel);
240  // finalP = Ptent + (I - \omega D^{-1}A) Ptent
241  finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
242  if (enforceConstraints) {
243  if (!pL.get<bool>("use kokkos refactor")) {
244  if (A->GetFixedBlockSize() == 1)
245  optimalSatisfyPConstraintsForScalarPDEsNonKokkos(finalP);
246  else
247  SatisfyPConstraintsNonKokkos(A, finalP);
248  } else {
249  // if (A->GetFixedBlockSize() == 1)
250  // optimalSatisfyPConstraintsForScalarPDEs(finalP);
251  // else
252  SatisfyPConstraints(A, finalP);
253  }
254  }
255  }
256  }
257 
258  } else {
259  finalP = Ptent;
260  }
261 
262  // Level Set
263  RCP<Matrix> R;
264  if (!restrictionMode_) {
265  // prolongation factory is in prolongation mode
266  if (!finalP.is_null()) {
267  std::ostringstream oss;
268  oss << "P_" << coarseLevel.GetLevelID();
269  finalP->setObjectLabel(oss.str());
270  }
271  Set(coarseLevel, "P", finalP);
272 
273  APparams->set("graph", finalP);
274  Set(coarseLevel, "AP reuse data", APparams);
275 
276  // NOTE: EXPERIMENTAL
277  if (Ptent->IsView("stridedMaps"))
278  finalP->CreateView("stridedMaps", Ptent);
279 
280  } else {
281  // prolongation factory is in restriction mode
282  {
283  SubFactoryMonitor m2(*this, "Transpose P", coarseLevel);
284  R = Utilities::Transpose(*finalP, true);
285  if (!R.is_null()) {
286  std::ostringstream oss;
287  oss << "R_" << coarseLevel.GetLevelID();
288  R->setObjectLabel(oss.str());
289  }
290  }
291 
292  Set(coarseLevel, "R", R);
293 
294  // NOTE: EXPERIMENTAL
295  if (Ptent->IsView("stridedMaps"))
296  R->CreateView("stridedMaps", Ptent, true /*transposeA*/);
297  }
298 
299  if (IsPrint(Statistics2)) {
300  RCP<ParameterList> params = rcp(new ParameterList());
301  params->set("printLoadBalancingInfo", true);
302  params->set("printCommInfo", true);
303  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo((!restrictionMode_ ? *finalP : *R), (!restrictionMode_ ? "P" : "R"), params);
304  }
305 
306 } // Build()
307 
308 // Analyze the grid transfer produced by smoothed aggregation and make
309 // modifications if it does not look right. In particular, if there are
310 // negative entries or entries larger than 1, modify P's rows.
311 //
312 // Note: this kind of evaluation probably only makes sense if not doing QR
313 // when constructing tentative P.
314 //
315 // For entries that do not satisfy the two constraints (>= 0 or <=1) we set
316 // these entries to the constraint value and modify the rest of the row
317 // so that the row sum remains the same as before by adding an equal
318 // amount to each remaining entry. However, if the original row sum value
319 // violates the constraints, we set the row sum back to 1 (the row sum of
320 // tentative P). After doing the modification to a row, we need to check
321 // again the entire row to make sure that the modified row does not violate
322 // the constraints.
323 
324 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
328  LO nPDEs = A->GetFixedBlockSize();
329  Teuchos::ArrayRCP<Scalar> ConstraintViolationSum(nPDEs);
330  Teuchos::ArrayRCP<Scalar> Rsum(nPDEs);
331  Teuchos::ArrayRCP<size_t> nPositive(nPDEs);
332  for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
333  for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
334  for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
335 
336  for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
340  P->getLocalRowView((LO)i, indices, vals1);
341  size_t nnz = indices.size();
342  if (nnz == 0) continue;
343 
344  vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
345 
346  bool checkRow = true;
347 
348  while (checkRow) {
349  // check constraints and compute the row sum
350 
351  for (LO j = 0; j < indices.size(); j++) {
352  Rsum[j % nPDEs] += vals[j];
354  ConstraintViolationSum[j % nPDEs] += vals[j];
355  vals[j] = zero;
356  } else {
358  (nPositive[j % nPDEs])++;
359 
361  ConstraintViolationSum[j % nPDEs] += (vals[j] - one);
362  vals[j] = one;
363  }
364  }
365  }
366 
367  checkRow = false;
368 
369  // take into account any row sum that violates the contraints
370 
371  for (size_t k = 0; k < (size_t)nPDEs; k++) {
373  ConstraintViolationSum[k] += (-Rsum[k]); // rstumin
375  ConstraintViolationSum[k] += (one - Rsum[k]); // rstumin
376  }
377  }
378 
379  // check if row need modification
380  for (size_t k = 0; k < (size_t)nPDEs; k++) {
381  if (Teuchos::ScalarTraits<SC>::magnitude(ConstraintViolationSum[k]) != Teuchos::ScalarTraits<SC>::magnitude(zero))
382  checkRow = true;
383  }
384  // modify row
385  if (checkRow) {
386  for (LO j = 0; j < indices.size(); j++) {
388  vals[j] += (ConstraintViolationSum[j % nPDEs] / (as<Scalar>(nPositive[j % nPDEs])));
389  }
390  }
391  for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
392  }
393  for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
394  for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
395  } // while (checkRow) ...
396  } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
397 } // SatsifyPConstraints()
398 
399 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
403 
404  LocalOrdinal maxEntriesPerRow = 100; // increased later if needed
405  Teuchos::ArrayRCP<Scalar> scalarData(3 * maxEntriesPerRow);
406  bool hasFeasible;
407 
408  for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
412  P->getLocalRowView((LocalOrdinal)i, indices, vals1);
413  size_t nnz = indices.size();
414  if (nnz != 0) {
415  vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
416  Scalar rsumTarget = zero;
417  for (size_t j = 0; j < nnz; j++) rsumTarget += vals[j];
418 
419  if (nnz > as<size_t>(maxEntriesPerRow)) {
420  maxEntriesPerRow = nnz * 3;
421  scalarData.resize(3 * maxEntriesPerRow);
422  }
423  hasFeasible = constrainRow(vals.getRawPtr(), as<LocalOrdinal>(nnz), zero, one, rsumTarget, vals.getRawPtr(), scalarData.getRawPtr());
424 
425  if (!hasFeasible) { // just set all entries to the same value giving a row sum of 1
426  for (size_t j = 0; j < nnz; j++) vals[j] = one / as<Scalar>(nnz);
427  }
428  }
429 
430  } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
431 } // SatsifyPConstraints()
432 
433 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
434 bool SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::constrainRow(Scalar* orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar* fixedUnsorted, Scalar* scalarData) const {
435  /*
436  Input
437  orig data that should be adjusted to satisfy bound constraints and
438  row sum constraint. orig is not modified by this function.
439 
440  nEntries length or 'orig'
441 
442  leftBound, define bound constraints for the results.
443  rghtBound
444 
445  rsumTarget defines an equality constraint for the row sum
446 
447  fixedUnsorted on output, if a feasible solutuion exists then
448  || orig - fixedUnsorted || = min when also
449  leftBound <= fixedUnsorted[i] <= rghtBound for all i
450  and sum(fixedUnsorted) = rsumTarget.
451 
452  Note: it is possible to use the same pointer for
453  fixedUnsorted and orig. In this case, orig gets
454  overwritten with the new constraint satisfying values.
455 
456  scalarData a work array that should be 3x nEntries.
457 
458  On return constrain() indicates whether or not a feasible solution exists.
459  */
460 
461  /*
462  Given a sequence of numbers o1 ... on, fix these so that they are as
463  close as possible to the original but satisfy bound constraints and also
464  have the same row sum as the oi's. If we know who is going to lie on a
465  bound, then the "best" answer (i.e., || o - f ||_2 = min) perturbs
466  each element that doesn't lie on a bound by the same amount.
467 
468  We can represent the oi's by considering scattered points on a number line
469 
470  | |
471  | |
472  o o o | o o o o o o |o o
473  | |
474 
475  \____/ \____/
476  <---- <----
477  delta delta
478 
479  Bounds are shown by vertical lines. The fi's must lie within the bounds, so
480  the 3 leftmost points must be shifted to the right and the 2 rightmost must
481  be shifted to the left. If these fi points are all shifted to the bounds
482  while the others remain in the same location, the row sum constraint is
483  likely not satisfied and so more shifting is necessary. In the figure, the f
484  rowsum is too large and so there must be more shifting to the left.
485 
486  To minimize || o - f ||_2, we basically shift all "interiors" by the same
487  amount, denoted delta. The only trick is that some points near bounds are
488  still affected by the bounds and so these points might be shifted more or less
489  than delta. In the example,t he 3 rightmost points are shifted in the opposite
490  direction as delta to the bound. The 4th point is shifted by something less
491  than delta so it does not violate the lower bound. The rightmost point is
492  shifted to the bound by some amount larger than delta. However, the 2nd point
493  is shifted by delta (i.e., it lies inside the two bounds).
494 
495  If we know delta, we can figure out everything. If we know which points
496  are special (not shifted by delta), we can also figure out everything.
497  The problem is these two things (delta and the special points) are
498  inter-connected. An algorithm for computing follows.
499 
500  1) move exterior points to the bounds and compute how much the row sum is off
501  (rowSumDeviation). We assume now that the new row sum is high, so interior
502  points must be shifted left.
503 
504  2) Mark closest point just left of the leftmost bound, closestToLeftBound,
505  and compute its distance to the leftmost bound. Mark closest point to the
506  left of the rightmost bound, closestToRghtBound, and compute its distance to
507  right bound. There are two cases to consider.
508 
509  3) Case 1: closestToLeftBound is closer than closestToRghtBound.
510  Assume that shifting by delta does not move closestToLeftBound past the
511  left bound. This means that it will be shifted by delta. However,
512  closestToRghtBound will be shifted by more than delta. So the total
513  number of points shifted by delta (|interiorToBounds|) includes
514  closestToLeftBound up to and including the point just to the left of
515  closestToRghtBound. So
516 
517  delta = rowSumDeviation/ |interiorToBounds| .
518 
519  Recall that rowSumDeviation already accounts for the non-delta shift of
520  of closestToRightBound. Now check whether our assumption is valid.
521 
522  If delta <= closestToLeftBoundDist, assumption is true so delta can be
523  applied to interiorToBounds ... and we are done.
524  Else assumption is false. Shift closestToLeftBound to the left bound.
525  Update rowSumDeviation, interiorToBounds, and identify new
526  closestToLeftBound. Repeat step 3).
527 
528  Case 2: closestToRghtBound is closer than closestToLeftBound.
529  Assume that shifting by delta does not move closestToRghtBound past right
530  bound. This means that it must be shifted by more than delta to right
531  bound. So the total number of points shifted by delta again includes
532  closestToLeftBound up to and including the point just to the left of
533  closestToRghtBound. So again compute
534 
535  delta = rowSumDeviation/ |interiorToBounds| .
536 
537  If delta <= closestToRghtBoundDist, assumption is true so delta is
538  can be applied to interiorToBounds ... and we are done
539  Else assumption is false. Put closestToRghtBound in the
540  interiorToBounds set. Remove it's contribution to rowSumDeviation,
541  identify new closestToRghtBound. Repeat step 3)
542 
543 
544  To implement, sort the oi's so things like closestToLeftBound is just index
545  into sorted array. Updaing closestToLeftBound or closestToRghtBound requires
546  increment by 1. |interiorToBounds|= closestToRghtBound - closestToLeftBound
547  To handle the case when the rowsum is low (requiring right interior shifts),
548  just flip the signs on data and use the left-shift code (and then flip back
549  before exiting function.
550  */
551  bool hasFeasibleSol;
552  Scalar notFlippedLeftBound, notFlippedRghtBound, aBigNumber, *origSorted;
553  Scalar rowSumDeviation, temp, *fixedSorted, delta;
554  Scalar closestToLeftBoundDist, closestToRghtBoundDist;
555  LocalOrdinal closestToLeftBound, closestToRghtBound;
556  LocalOrdinal* inds;
557  bool flipped;
558 
559  notFlippedLeftBound = leftBound;
560  notFlippedRghtBound = rghtBound;
561 
562  if ((Teuchos::ScalarTraits<SC>::real(rsumTarget) >= Teuchos::ScalarTraits<SC>::real(leftBound * as<Scalar>(nEntries))) &&
563  (Teuchos::ScalarTraits<SC>::real(rsumTarget) <= Teuchos::ScalarTraits<SC>::real(rghtBound * as<Scalar>(nEntries))))
564  hasFeasibleSol = true;
565  else {
566  hasFeasibleSol = false;
567  return hasFeasibleSol;
568  }
569  flipped = false;
570  // compute aBigNumber to handle some corner cases where we need
571  // something large so that an if statement will be false
572  aBigNumber = Teuchos::ScalarTraits<SC>::zero();
573  for (LocalOrdinal i = 0; i < nEntries; i++) {
575  aBigNumber = Teuchos::ScalarTraits<SC>::magnitude(orig[i]);
576  }
577  aBigNumber = aBigNumber + (Teuchos::ScalarTraits<SC>::magnitude(leftBound) + Teuchos::ScalarTraits<SC>::magnitude(rghtBound)) * as<Scalar>(100.0);
578 
579  origSorted = &scalarData[0];
580  fixedSorted = &(scalarData[nEntries]);
581  inds = (LocalOrdinal*)&(scalarData[2 * nEntries]);
582 
583  for (LocalOrdinal i = 0; i < nEntries; i++) inds[i] = i;
584  for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[i]; /* orig no longer used */
585 
586  // sort so that orig[inds] is sorted.
587  std::sort(inds, inds + nEntries,
588  [origSorted](LocalOrdinal leftIndex, LocalOrdinal rightIndex) { return Teuchos::ScalarTraits<SC>::real(origSorted[leftIndex]) < Teuchos::ScalarTraits<SC>::real(origSorted[rightIndex]); });
589 
590  for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[inds[i]];
591  // find entry in origSorted just to the right of the leftBound
592  closestToLeftBound = 0;
593  while ((closestToLeftBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToLeftBound]) <= Teuchos::ScalarTraits<SC>::real(leftBound))) closestToLeftBound++;
594 
595  // find entry in origSorted just to the right of the rghtBound
596  closestToRghtBound = closestToLeftBound;
597  while ((closestToRghtBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToRghtBound]) <= Teuchos::ScalarTraits<SC>::real(rghtBound))) closestToRghtBound++;
598 
599  // compute distance between closestToLeftBound and the left bound and the
600  // distance between closestToRghtBound and the right bound.
601 
602  closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
603  if (closestToRghtBound == nEntries)
604  closestToRghtBoundDist = aBigNumber;
605  else
606  closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
607 
608  // compute how far the rowSum is off from the target row sum taking into account
609  // numbers that have been shifted to satisfy bound constraint
610 
611  rowSumDeviation = leftBound * as<Scalar>(closestToLeftBound) + as<Scalar>((nEntries - closestToRghtBound)) * rghtBound - rsumTarget;
612  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted[i];
613 
614  // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
615  // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
616  // Later we will flip the data back to its original form.
618  flipped = true;
619  temp = leftBound;
620  leftBound = -rghtBound;
621  rghtBound = temp;
622 
623  /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
624 
625  if ((nEntries % 2) == 1) origSorted[(nEntries / 2)] = -origSorted[(nEntries / 2)];
626  for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
627  temp = origSorted[i];
628  origSorted[i] = -origSorted[nEntries - 1 - i];
629  origSorted[nEntries - i - 1] = -temp;
630  }
631 
632  /* reverse bounds */
633 
634  LocalOrdinal itemp = closestToLeftBound;
635  closestToLeftBound = nEntries - closestToRghtBound;
636  closestToRghtBound = nEntries - itemp;
637  closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
638  if (closestToRghtBound == nEntries)
639  closestToRghtBoundDist = aBigNumber;
640  else
641  closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
642 
643  rowSumDeviation = -rowSumDeviation;
644  }
645 
646  // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
647 
648  for (LocalOrdinal i = 0; i < closestToLeftBound; i++) fixedSorted[i] = leftBound;
649  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i];
650  for (LocalOrdinal i = closestToRghtBound; i < nEntries; i++) fixedSorted[i] = rghtBound;
651 
652  while ((Teuchos::ScalarTraits<SC>::magnitude(rowSumDeviation) > Teuchos::ScalarTraits<SC>::magnitude(as<Scalar>(1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
653  if (closestToRghtBound != closestToLeftBound)
654  delta = rowSumDeviation / as<Scalar>(closestToRghtBound - closestToLeftBound);
655  else
656  delta = aBigNumber;
657 
658  if (Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
659  if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist)) {
660  rowSumDeviation = Teuchos::ScalarTraits<SC>::zero();
661  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
662  } else {
663  rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
664  fixedSorted[closestToLeftBound] = leftBound;
665  closestToLeftBound++;
666  if (closestToLeftBound < nEntries)
667  closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
668  else
669  closestToLeftBoundDist = aBigNumber;
670  }
671  } else {
672  if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
673  rowSumDeviation = 0;
674  for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
675  } else {
676  rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
677  // if (closestToRghtBound < nEntries) {
678  fixedSorted[closestToRghtBound] = origSorted[closestToRghtBound];
679  closestToRghtBound++;
680  // }
681  if (closestToRghtBound >= nEntries)
682  closestToRghtBoundDist = aBigNumber;
683  else
684  closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
685  }
686  }
687  }
688 
689  if (flipped) {
690  /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
691 
692  if ((nEntries % 2) == 1) fixedSorted[(nEntries / 2)] = -fixedSorted[(nEntries / 2)];
693  for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
694  temp = fixedSorted[i];
695  fixedSorted[i] = -fixedSorted[nEntries - 1 - i];
696  fixedSorted[nEntries - i - 1] = -temp;
697  }
698  }
699  for (LocalOrdinal i = 0; i < nEntries; i++) fixedUnsorted[inds[i]] = fixedSorted[i];
700 
701  /* check that no constraints are violated */
702 
703  bool lowerViolation = false;
704  bool upperViolation = false;
705  bool sumViolation = false;
706  using TST = Teuchos::ScalarTraits<SC>;
707  temp = TST::zero();
708  for (LocalOrdinal i = 0; i < nEntries; i++) {
709  if (TST::real(fixedUnsorted[i]) < TST::real(notFlippedLeftBound)) lowerViolation = true;
710  if (TST::real(fixedUnsorted[i]) > TST::real(notFlippedRghtBound)) upperViolation = true;
711  temp += fixedUnsorted[i];
712  }
713  SC tol = as<Scalar>(std::max(1.0e-8, as<double>(100 * TST::eps())));
714  if (TST::magnitude(temp - rsumTarget) > TST::magnitude(tol * rsumTarget)) sumViolation = true;
715 
716  TEUCHOS_TEST_FOR_EXCEPTION(lowerViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a lower bound violation??? ");
717  TEUCHOS_TEST_FOR_EXCEPTION(upperViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in an upper bound violation??? ");
718  TEUCHOS_TEST_FOR_EXCEPTION(sumViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a row sum violation??? ");
719 
720  return hasFeasibleSol;
721 }
722 
723 template <typename local_matrix_type>
725  using Scalar = typename local_matrix_type::non_const_value_type;
726  using SC = Scalar;
727  using LO = typename local_matrix_type::non_const_ordinal_type;
728  using Device = typename local_matrix_type::device_type;
729  using KAT = Kokkos::ArithTraits<SC>;
730  const Scalar zero = KAT::zero();
731  const Scalar one = KAT::one();
733  local_matrix_type localP;
734  Kokkos::View<SC**, Device> ConstraintViolationSum;
735  Kokkos::View<SC**, Device> Rsum;
736  Kokkos::View<size_t**, Device> nPositive;
737 
738  constraintKernel(LO nPDEs_, local_matrix_type localP_)
739  : nPDEs(nPDEs_)
740  , localP(localP_) {
741  ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
742  Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
743  nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
744  }
745 
746  KOKKOS_INLINE_FUNCTION
747  void operator()(const size_t rowIdx) const {
748  auto rowPtr = localP.graph.row_map;
749  auto values = localP.values;
750 
751  bool checkRow = true;
752 
753  if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
754 
755  while (checkRow) {
756  // check constraints and compute the row sum
757 
758  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
759  Rsum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
760  if (KAT::real(values(entryIdx)) < KAT::real(zero)) {
761  ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
762  values(entryIdx) = zero;
763  } else {
764  if (KAT::real(values(entryIdx)) != KAT::real(zero))
765  nPositive(rowIdx, entryIdx % nPDEs) = nPositive(rowIdx, entryIdx % nPDEs) + 1;
766 
767  if (KAT::real(values(entryIdx)) > KAT::real(1.00001)) {
768  ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += (values(entryIdx) - one);
769  values(entryIdx) = one;
770  }
771  }
772  }
773 
774  checkRow = false;
775 
776  // take into account any row sum that violates the contraints
777 
778  for (size_t k = 0; k < (size_t)nPDEs; k++) {
779  if (KAT::real(Rsum(rowIdx, k)) < KAT::magnitude(zero)) {
780  ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
781  } else if (KAT::real(Rsum(rowIdx, k)) > KAT::magnitude(1.00001)) {
782  ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) + (one - Rsum(rowIdx, k)); // rstumin
783  }
784  }
785 
786  // check if row need modification
787  for (size_t k = 0; k < (size_t)nPDEs; k++) {
788  if (KAT::magnitude(ConstraintViolationSum(rowIdx, k)) != KAT::magnitude(zero))
789  checkRow = true;
790  }
791  // modify row
792  if (checkRow) {
793  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
794  if (KAT::real(values(entryIdx)) > KAT::real(zero)) {
795  values(entryIdx) = values(entryIdx) +
796  (ConstraintViolationSum(rowIdx, entryIdx % nPDEs) / (Scalar(nPositive(rowIdx, entryIdx % nPDEs)) != zero ? Scalar(nPositive(rowIdx, entryIdx % nPDEs)) : one));
797  }
798  }
799  for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
800  }
801  for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum(rowIdx, k) = zero;
802  for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive(rowIdx, k) = 0;
803  } // while (checkRow) ...
804  }
805 };
806 
807 template <typename local_matrix_type>
809  using Scalar = typename local_matrix_type::non_const_value_type;
810  using SC = Scalar;
811  using LO = typename local_matrix_type::non_const_ordinal_type;
812  using Device = typename local_matrix_type::device_type;
813  using KAT = Kokkos::ArithTraits<SC>;
814  const Scalar zero = KAT::zero();
815  const Scalar one = KAT::one();
817  local_matrix_type localP;
818  Kokkos::View<SC**, Device> origSorted;
819  Kokkos::View<SC**, Device> fixedSorted;
820  Kokkos::View<LO**, Device> inds;
821 
822  optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
823  : nPDEs(nPDEs_)
824  , localP(localP_) {
825  origSorted = Kokkos::View<SC**, Device>("origSorted", localP_.numRows(), maxRowEntries_);
826  fixedSorted = Kokkos::View<SC**, Device>("fixedSorted", localP_.numRows(), maxRowEntries_);
827  inds = Kokkos::View<LO**, Device>("inds", localP_.numRows(), maxRowEntries_);
828  }
829 
830  KOKKOS_INLINE_FUNCTION
831  void operator()(const size_t rowIdx) const {
832  auto rowPtr = localP.graph.row_map;
833  auto values = localP.values;
834 
835  auto nnz = rowPtr(rowIdx + 1) - rowPtr(rowIdx);
836 
837  if (nnz != 0) {
838  Scalar rsumTarget = zero;
839  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
840 
841  {
842  SC aBigNumber;
843  SC rowSumDeviation, temp, delta;
844  SC closestToLeftBoundDist, closestToRghtBoundDist;
845  LO closestToLeftBound, closestToRghtBound;
846  bool flipped;
847 
848  SC leftBound = zero;
849  SC rghtBound = one;
850  if ((KAT::real(rsumTarget) >= KAT::real(leftBound * (static_cast<SC>(nnz)))) &&
851  (KAT::real(rsumTarget) <= KAT::real(rghtBound * (static_cast<SC>(nnz))))) { // has Feasible solution
852 
853  flipped = false;
854  // compute aBigNumber to handle some corner cases where we need
855  // something large so that an if statement will be false
856  aBigNumber = KAT::zero();
857  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
858  if (KAT::magnitude(values(entryIdx)) > KAT::magnitude(aBigNumber))
859  aBigNumber = KAT::magnitude(values(entryIdx));
860  }
861  aBigNumber = aBigNumber + (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound)) * (100 * one);
862 
863  LO ind = 0;
864  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
865  origSorted(rowIdx, ind) = values(entryIdx);
866  inds(rowIdx, ind) = ind;
867  ind++;
868  }
869 
870  auto sortVals = Kokkos::subview(origSorted, rowIdx, Kokkos::ALL());
871  auto sortInds = Kokkos::subview(inds, rowIdx, Kokkos::make_pair(0, ind));
872  // Need permutation indices to sort row values from smallest to largest,
873  // and unsort once row constraints are applied.
874  // serial insertion sort workaround from https://github.com/kokkos/kokkos/issues/645
875  for (LO i = 1; i < static_cast<LO>(nnz); ++i) {
876  ind = sortInds(i);
877  LO j = i;
878 
879  if (KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0)))) {
880  for (; j > 0; --j) sortInds(j) = sortInds(j - 1);
881 
882  sortInds(0) = ind;
883  } else {
884  for (; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j - 1))); --j) sortInds(j) = sortInds(j - 1);
885 
886  sortInds(j) = ind;
887  }
888  }
889 
890  for (LO i = 0; i < static_cast<LO>(nnz); i++) origSorted(rowIdx, i) = values(rowPtr(rowIdx) + inds(rowIdx, i)); // values is no longer used
891  // find entry in origSorted just to the right of the leftBound
892  closestToLeftBound = 0;
893  while ((closestToLeftBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
894 
895  // find entry in origSorted just to the right of the rghtBound
896  closestToRghtBound = closestToLeftBound;
897  while ((closestToRghtBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
898 
899  // compute distance between closestToLeftBound and the left bound and the
900  // distance between closestToRghtBound and the right bound.
901 
902  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
903  if (closestToRghtBound == static_cast<LO>(nnz))
904  closestToRghtBoundDist = aBigNumber;
905  else
906  closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
907 
908  // compute how far the rowSum is off from the target row sum taking into account
909  // numbers that have been shifted to satisfy bound constraint
910 
911  rowSumDeviation = leftBound * (static_cast<SC>(closestToLeftBound)) + (static_cast<SC>(nnz - closestToRghtBound)) * rghtBound - rsumTarget;
912  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted(rowIdx, i);
913 
914  // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
915  // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
916  // Later we will flip the data back to its original form.
917  if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
918  flipped = true;
919  temp = leftBound;
920  leftBound = -rghtBound;
921  rghtBound = temp;
922 
923  /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
924 
925  // TODO: the following is bad for GPU performance. Switch to bit shifting if brave.
926  if ((nnz % 2) == 1) origSorted(rowIdx, (nnz / 2)) = -origSorted(rowIdx, (nnz / 2));
927  for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
928  temp = origSorted(rowIdx, i);
929  origSorted(rowIdx, i) = -origSorted(rowIdx, nnz - 1 - i);
930  origSorted(rowIdx, nnz - i - 1) = -temp;
931  }
932 
933  /* reverse bounds */
934 
935  LO itemp = closestToLeftBound;
936  closestToLeftBound = nnz - closestToRghtBound;
937  closestToRghtBound = nnz - itemp;
938  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
939  if (closestToRghtBound == static_cast<LO>(nnz))
940  closestToRghtBoundDist = aBigNumber;
941  else
942  closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
943 
944  rowSumDeviation = -rowSumDeviation;
945  }
946 
947  // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
948 
949  for (LO i = 0; i < closestToLeftBound; i++) fixedSorted(rowIdx, i) = leftBound;
950  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i);
951  for (LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++) fixedSorted(rowIdx, i) = rghtBound;
952 
953  while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((one * 1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
954  if (closestToRghtBound != closestToLeftBound)
955  delta = rowSumDeviation / static_cast<SC>(closestToRghtBound - closestToLeftBound);
956  else
957  delta = aBigNumber;
958 
959  if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
960  if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
961  rowSumDeviation = zero;
962  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
963  } else {
964  rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
965  fixedSorted(rowIdx, closestToLeftBound) = leftBound;
966  closestToLeftBound++;
967  if (closestToLeftBound < static_cast<LO>(nnz))
968  closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
969  else
970  closestToLeftBoundDist = aBigNumber;
971  }
972  } else {
973  if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
974  rowSumDeviation = 0;
975  for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
976  } else {
977  rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
978  // if (closestToRghtBound < nEntries) {
979  fixedSorted(rowIdx, closestToRghtBound) = origSorted(rowIdx, closestToRghtBound);
980  closestToRghtBound++;
981  // }
982  if (closestToRghtBound >= static_cast<LO>(nnz))
983  closestToRghtBoundDist = aBigNumber;
984  else
985  closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
986  }
987  }
988  }
989 
990  auto rowStart = rowPtr(rowIdx);
991  if (flipped) {
992  /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
993 
994  if ((nnz % 2) == 1) fixedSorted(rowIdx, (nnz / 2)) = -fixedSorted(rowIdx, (nnz / 2));
995  for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
996  temp = fixedSorted(rowIdx, i);
997  fixedSorted(rowIdx, i) = -fixedSorted(rowIdx, nnz - 1 - i);
998  fixedSorted(rowIdx, nnz - i - 1) = -temp;
999  }
1000  }
1001  // unsort and update row values with new values
1002  for (LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart + inds(rowIdx, i)) = fixedSorted(rowIdx, i);
1003 
1004  } else { // row does not have feasible solution to match constraint
1005  // just set all entries to the same value giving a row sum of 1
1006  for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) = one / (static_cast<SC>(nnz));
1007  }
1008  }
1009  }
1010  }
1011 };
1012 
1013 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1015  using Device = typename Matrix::local_matrix_type::device_type;
1016  LO nPDEs = A->GetFixedBlockSize();
1017 
1018  using local_mat_type = typename Matrix::local_matrix_type;
1019  constraintKernel<local_mat_type> myKernel(nPDEs, P->getLocalMatrixDevice());
1020  Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements()),
1021  myKernel);
1022 
1023 } // SatsifyPConstraints()
1024 
1025 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1027  using Device = typename Matrix::local_matrix_type::device_type;
1028  LO nPDEs = 1; // A->GetFixedBlockSize();
1029 
1030  using local_mat_type = typename Matrix::local_matrix_type;
1031  optimalSatisfyConstraintsForScalarPDEsKernel<local_mat_type> myKernel(nPDEs, P->getLocalMaxNumRowEntries(), P->getLocalMatrixDevice());
1032  Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows()),
1033  myKernel);
1034 
1035 } // SatsifyPConstraints()
1036 
1037 } // namespace MueLu
1038 
1039 #endif // MUELU_SAPFACTORY_DEF_HPP
1040 
1041 // TODO: restrictionMode_ should use the parameter list.
typename local_matrix_type::non_const_value_type Scalar
MueLu::DefaultLocalOrdinal LocalOrdinal
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level-&gt;Get&lt; RCP&lt;Matrix&gt; &gt;(&quot;A&quot;, factory) if factory == NULL =&gt; use default factory.
void optimalSatisfyPConstraintsForScalarPDEs(RCP< Matrix > &P) const
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
typename local_matrix_type::device_type Device
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Power method.
bool constrainRow(Scalar *orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar *fixedUnsorted, Scalar *scalarData) const
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
T & get(const std::string &name, T def_value)
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static magnitudeType real(T a)
Print more statistics.
T * getRawPtr() const
size_type size() const
typename local_matrix_type::non_const_ordinal_type LO
constraintKernel(LO nPDEs_, local_matrix_type localP_)
LocalOrdinal LO
One-liner description of what is happening.
void SatisfyPConstraintsNonKokkos(RCP< Matrix > A, RCP< Matrix > &P) const
T * get() const
Print even more statistics.
Kokkos::View< size_t **, Device > nPositive
void resize(const size_type n, const T &val=T())
bool isParameter(const std::string &name) const
typename local_matrix_type::device_type Device
static RCP< Matrix > Jacobi(SC omega, const Vector &Dinv, const Matrix &A, const Matrix &B, RCP< Matrix > C_in, Teuchos::FancyOStream &fos, const std::string &label, RCP< ParameterList > &params)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
bool isSublist(const std::string &name) const
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
void Build(Level &fineLevel, Level &coarseLevel) const
Build method.
Kokkos::View< SC **, Device > ConstraintViolationSum
#define SET_VALID_ENTRY(name)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
T * getRawPtr() const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void SatisfyPConstraints(RCP< Matrix > A, RCP< Matrix > &P) const
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
Kokkos::ArithTraits< SC > KAT
static magnitudeType magnitude(T a)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
typename local_matrix_type::non_const_value_type Scalar
typename local_matrix_type::non_const_ordinal_type LO
Scalar SC
Kokkos::View< SC **, Device > Rsum
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:96
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
void optimalSatisfyPConstraintsForScalarPDEsNonKokkos(RCP< Matrix > &P) const
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
Exception throws to report errors in the internal logical of the program.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
bool is_null() const