Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Relaxation_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_RELAXATION_DEF_HPP
11 #define IFPACK2_RELAXATION_DEF_HPP
12 
13 #include "Teuchos_StandardParameterEntryValidators.hpp"
14 #include "Teuchos_TimeMonitor.hpp"
15 #include "Tpetra_CrsMatrix.hpp"
16 #include "Tpetra_BlockCrsMatrix.hpp"
17 #include "Tpetra_BlockView.hpp"
18 #include "Ifpack2_Utilities.hpp"
19 #include "Ifpack2_Details_getCrsMatrix.hpp"
20 #include "MatrixMarket_Tpetra.hpp"
21 #include "Tpetra_Details_residual.hpp"
22 #include <cstdlib>
23 #include <memory>
24 #include <sstream>
25 #include "KokkosSparse_gauss_seidel.hpp"
26 
27 namespace {
28 // Validate that a given int is nonnegative.
29 class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
30  public:
31  // Constructor (does nothing).
32  NonnegativeIntValidator() {}
33 
34  // ParameterEntryValidator wants this method.
36  return Teuchos::null;
37  }
38 
39  // Actually validate the parameter's value.
40  void
41  validate(const Teuchos::ParameterEntry& entry,
42  const std::string& paramName,
43  const std::string& sublistName) const {
44  using std::endl;
45  Teuchos::any anyVal = entry.getAny(true);
46  const std::string entryName = entry.getAny(false).typeName();
47 
49  anyVal.type() != typeid(int),
51  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
52  << "\" has the wrong type." << endl
53  << "Parameter: " << paramName
54  << endl
55  << "Type specified: " << entryName << endl
56  << "Type required: int" << endl);
57 
58  const int val = Teuchos::any_cast<int>(anyVal);
61  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
62  << "\" is negative." << endl
63  << "Parameter: " << paramName
64  << endl
65  << "Value specified: " << val << endl
66  << "Required range: [0, INT_MAX]" << endl);
67  }
68 
69  // ParameterEntryValidator wants this method.
70  const std::string getXMLTypeName() const {
71  return "NonnegativeIntValidator";
72  }
73 
74  // ParameterEntryValidator wants this method.
75  void
76  printDoc(const std::string& docString,
77  std::ostream& out) const {
78  Teuchos::StrUtils::printLines(out, "# ", docString);
79  out << "#\tValidator Used: " << std::endl;
80  out << "#\t\tNonnegativeIntValidator" << std::endl;
81  }
82 };
83 
84 // A way to get a small positive number (eps() for floating-point
85 // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
86 // define it (for example, for integer values).
87 template <class Scalar, const bool isOrdinal = Teuchos::ScalarTraits<Scalar>::isOrdinal>
88 class SmallTraits {
89  public:
90  // Return eps if Scalar is a floating-point type, else return 1.
91  static const Scalar eps();
92 };
93 
94 // Partial specialization for when Scalar is not a floating-point type.
95 template <class Scalar>
96 class SmallTraits<Scalar, true> {
97  public:
98  static const Scalar eps() {
100  }
101 };
102 
103 // Partial specialization for when Scalar is a floating-point type.
104 template <class Scalar>
105 class SmallTraits<Scalar, false> {
106  public:
107  static const Scalar eps() {
109  }
110 };
111 
112 // Work-around for GitHub Issue #5269.
113 template <class Scalar,
114  const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
115 struct RealTraits {};
116 
117 template <class Scalar>
118 struct RealTraits<Scalar, false> {
119  using val_type = Scalar;
120  using mag_type = Scalar;
121  static KOKKOS_INLINE_FUNCTION mag_type real(const val_type& z) {
122  return z;
123  }
124 };
125 
126 template <class Scalar>
127 struct RealTraits<Scalar, true> {
128  using val_type = Scalar;
129  using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
130  static KOKKOS_INLINE_FUNCTION mag_type real(const val_type& z) {
131 #if KOKKOS_VERSION >= 40799
132  return KokkosKernels::ArithTraits<val_type>::real(z);
133 #else
134  return Kokkos::ArithTraits<val_type>::real(z);
135 #endif
136  }
137 };
138 
139 template <class Scalar>
140 KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
141 getRealValue(const Scalar& z) {
142  return RealTraits<Scalar>::real(z);
143 }
144 
145 } // namespace
146 
147 namespace Ifpack2 {
148 
149 template <class MatrixType>
150 void Relaxation<MatrixType>::
151  updateCachedMultiVector(const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
152  size_t numVecs) const {
153  // Allocate a multivector if the cached one isn't perfect. Checking
154  // for map pointer equality is much cheaper than Map::isSameAs.
155  if (cachedMV_.is_null() ||
156  map.get() != cachedMV_->getMap().get() ||
157  cachedMV_->getNumVectors() != numVecs) {
158  using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
159  global_ordinal_type, node_type>;
160  cachedMV_ = Teuchos::rcp(new MV(map, numVecs, false));
161  }
162 }
163 
164 template <class MatrixType>
167  if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
168  Importer_ = Teuchos::null;
169  pointImporter_ = Teuchos::null;
170  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
171  isInitialized_ = false;
172  IsComputed_ = false;
173  diagOffsets_ = Kokkos::View<size_t*, device_type>();
174  savedDiagOffsets_ = false;
175  hasBlockCrsMatrix_ = false;
176  serialGaussSeidel_ = Teuchos::null;
177  if (!A.is_null()) {
178  IsParallel_ = (A->getRowMap()->getComm()->getSize() > 1);
179  }
180  A_ = A;
181  }
182 }
183 
184 template <class MatrixType>
187  : A_(A)
188  , IsParallel_((A.is_null() || A->getRowMap().is_null() || A->getRowMap()->getComm().is_null()) ? false : // a reasonable default if there's no communicator
189  A->getRowMap()->getComm()->getSize() > 1) {
190  this->setObjectLabel("Ifpack2::Relaxation");
191 }
192 
193 template <class MatrixType>
196  using Teuchos::Array;
198  using Teuchos::parameterList;
199  using Teuchos::RCP;
200  using Teuchos::rcp;
201  using Teuchos::rcp_const_cast;
202  using Teuchos::rcp_implicit_cast;
203  using Teuchos::setStringToIntegralParameter;
205 
206  if (validParams_.is_null()) {
207  RCP<ParameterList> pl = parameterList("Ifpack2::Relaxation");
208 
209  // Set a validator that automatically converts from the valid
210  // string options to their enum values.
211  Array<std::string> precTypes(8);
212  precTypes[0] = "Jacobi";
213  precTypes[1] = "Gauss-Seidel";
214  precTypes[2] = "Symmetric Gauss-Seidel";
215  precTypes[3] = "MT Gauss-Seidel";
216  precTypes[4] = "MT Symmetric Gauss-Seidel";
217  precTypes[5] = "Richardson";
218  precTypes[6] = "Two-stage Gauss-Seidel";
219  precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
220  Array<Details::RelaxationType> precTypeEnums(8);
221  precTypeEnums[0] = Details::JACOBI;
222  precTypeEnums[1] = Details::GS;
223  precTypeEnums[2] = Details::SGS;
224  precTypeEnums[3] = Details::MTGS;
225  precTypeEnums[4] = Details::MTSGS;
226  precTypeEnums[5] = Details::RICHARDSON;
227  precTypeEnums[6] = Details::GS2;
228  precTypeEnums[7] = Details::SGS2;
229  const std::string defaultPrecType("Jacobi");
230  setStringToIntegralParameter<Details::RelaxationType>("relaxation: type",
231  defaultPrecType, "Relaxation method", precTypes(), precTypeEnums(),
232  pl.getRawPtr());
233 
234  const int numSweeps = 1;
235  RCP<PEV> numSweepsValidator =
236  rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
237  pl->set("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
238  rcp_const_cast<const PEV>(numSweepsValidator));
239 
240  // number of 'local' outer sweeps for two-stage GS
241  const int numOuterSweeps = 1;
242  RCP<PEV> numOuterSweepsValidator =
243  rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
244  pl->set("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
245  rcp_const_cast<const PEV>(numOuterSweepsValidator));
246  // number of 'local' inner sweeps for two-stage GS
247  const int numInnerSweeps = 1;
248  RCP<PEV> numInnerSweepsValidator =
249  rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
250  pl->set("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
251  rcp_const_cast<const PEV>(numInnerSweepsValidator));
252  // specify damping factor for the inner sweeps of two-stage GS
253  const scalar_type innerDampingFactor = STS::one();
254  pl->set("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
255  // specify whether to use sptrsv instead of inner-iterations for two-stage GS
256  const bool innerSpTrsv = false;
257  pl->set("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
258  // specify whether to use compact form of recurrence for two-stage GS
259  const bool compactForm = false;
260  pl->set("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
261 
262  const scalar_type dampingFactor = STS::one();
263  pl->set("relaxation: damping factor", dampingFactor);
264 
265  const bool zeroStartingSolution = true;
266  pl->set("relaxation: zero starting solution", zeroStartingSolution);
267 
268  const bool doBackwardGS = false;
269  pl->set("relaxation: backward mode", doBackwardGS);
270 
271  const bool doL1Method = false;
272  pl->set("relaxation: use l1", doL1Method);
273 
274  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
275  (STM::one() + STM::one()); // 1.5
276  pl->set("relaxation: l1 eta", l1eta);
277 
278  const scalar_type minDiagonalValue = STS::zero();
279  pl->set("relaxation: min diagonal value", minDiagonalValue);
280 
281  const bool fixTinyDiagEntries = false;
282  pl->set("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
283 
284  const bool checkDiagEntries = false;
285  pl->set("relaxation: check diagonal entries", checkDiagEntries);
286 
287  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
288  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
289 
290  const bool is_matrix_structurally_symmetric = false;
291  pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
292 
293  const bool ifpack2_dump_matrix = false;
294  pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
295 
296  const int cluster_size = 1;
297  pl->set("relaxation: mtgs cluster size", cluster_size);
298 
299  pl->set("relaxation: mtgs coloring algorithm", "Default");
300 
301  const int long_row_threshold = 0;
302  pl->set("relaxation: long row threshold", long_row_threshold);
303 
304  const bool timer_for_apply = true;
305  pl->set("timer for apply", timer_for_apply);
306 
307  validParams_ = rcp_const_cast<const ParameterList>(pl);
308  }
309  return validParams_;
310 }
311 
312 template <class MatrixType>
314  using Teuchos::getIntegralValue;
315  using Teuchos::getStringValue;
317  using Teuchos::RCP;
318  typedef scalar_type ST; // just to make code below shorter
319 
320  if (pl.isType<double>("relaxation: damping factor")) {
321  // Make sure that ST=complex can run with a damping factor that is
322  // a double.
323  ST df = pl.get<double>("relaxation: damping factor");
324  pl.remove("relaxation: damping factor");
325  pl.set("relaxation: damping factor", df);
326  }
327 
329 
330  const Details::RelaxationType precType =
331  getIntegralValue<Details::RelaxationType>(pl, "relaxation: type");
332  const std::string precTypeStr = getStringValue<Details::RelaxationType>(pl, "relaxation: type");
333  // We only access "relaxation: type" using strings in the rest of the code
334  pl.set<std::string>("relaxation: type", precTypeStr);
335  pl.get<std::string>("relaxation: type"); // We need to mark the parameter as "used"
336  const int numSweeps = pl.get<int>("relaxation: sweeps");
337  const ST dampingFactor = pl.get<ST>("relaxation: damping factor");
338  const bool zeroStartSol = pl.get<bool>("relaxation: zero starting solution");
339  const bool doBackwardGS = pl.get<bool>("relaxation: backward mode");
340  const bool doL1Method = pl.get<bool>("relaxation: use l1");
341  const magnitude_type l1Eta = pl.get<magnitude_type>("relaxation: l1 eta");
342  const ST minDiagonalValue = pl.get<ST>("relaxation: min diagonal value");
343  const bool fixTinyDiagEntries = pl.get<bool>("relaxation: fix tiny diagonal entries");
344  const bool checkDiagEntries = pl.get<bool>("relaxation: check diagonal entries");
345  const bool is_matrix_structurally_symmetric = pl.get<bool>("relaxation: symmetric matrix structure");
346  const bool ifpack2_dump_matrix = pl.get<bool>("relaxation: ifpack2 dump matrix");
347  const bool timer_for_apply = pl.get<bool>("timer for apply");
348  int cluster_size = 1;
349  if (pl.isParameter("relaxation: mtgs cluster size")) // optional parameter
350  cluster_size = pl.get<int>("relaxation: mtgs cluster size");
351  int long_row_threshold = 0;
352  if (pl.isParameter("relaxation: long row threshold")) // optional parameter
353  long_row_threshold = pl.get<int>("relaxation: long row threshold");
354  std::string color_algo_name = pl.get<std::string>("relaxation: mtgs coloring algorithm");
355  // convert to lowercase
356  for (char& c : color_algo_name)
357  c = tolower(c);
358  if (color_algo_name == "default")
359  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_DEFAULT;
360  else if (color_algo_name == "serial")
361  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_SERIAL;
362  else if (color_algo_name == "vb")
363  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VB;
364  else if (color_algo_name == "vbbit")
365  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBBIT;
366  else if (color_algo_name == "vbcs")
367  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBCS;
368  else if (color_algo_name == "vbd")
369  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBD;
370  else if (color_algo_name == "vbdbit")
371  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBDBIT;
372  else if (color_algo_name == "eb")
373  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_EB;
374  else {
375  std::ostringstream msg;
376  msg << "Ifpack2::Relaxation: 'relaxation: mtgs coloring algorithm' = '" << color_algo_name << "' is not valid.\n";
377  msg << "Choices (not case sensitive) are: Default, Serial, VB, VBBIT, VBCS, VBD, VBDBIT, EB.";
379  true, std::invalid_argument, msg.str());
380  }
381 
382  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type>>("relaxation: local smoothing indices");
383 
384  // for Two-stage Gauss-Seidel
385  if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
386  // Make sure that ST=complex can run with a damping factor that is
387  // a double.
388  ST df = pl.get<double>("relaxation: inner damping factor");
389  pl.remove("relaxation: inner damping factor");
390  pl.set("relaxation: inner damping factor", df);
391  }
392  // If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
393  if (long_row_threshold > 0) {
395  cluster_size != 1, std::invalid_argument,
396  "Ifpack2::Relaxation: "
397  "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
399  precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
400  std::invalid_argument,
401  "Ifpack2::Relaxation: "
402  "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
403  "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
404  }
405 
406  const ST innerDampingFactor = pl.get<ST>("relaxation: inner damping factor");
407  const int numInnerSweeps = pl.get<int>("relaxation: inner sweeps");
408  const int numOuterSweeps = pl.get<int>("relaxation: outer sweeps");
409  const bool innerSpTrsv = pl.get<bool>("relaxation: inner sparse-triangular solve");
410  const bool compactForm = pl.get<bool>("relaxation: compact form");
411 
412  // "Commit" the changes, now that we've validated everything.
413  PrecType_ = precType;
414  NumSweeps_ = numSweeps;
415  DampingFactor_ = dampingFactor;
416  ZeroStartingSolution_ = zeroStartSol;
417  DoBackwardGS_ = doBackwardGS;
418  DoL1Method_ = doL1Method;
419  L1Eta_ = l1Eta;
420  MinDiagonalValue_ = minDiagonalValue;
421  fixTinyDiagEntries_ = fixTinyDiagEntries;
422  checkDiagEntries_ = checkDiagEntries;
423  clusterSize_ = cluster_size;
424  longRowThreshold_ = long_row_threshold;
425  is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
426  ifpack2_dump_matrix_ = ifpack2_dump_matrix;
427  localSmoothingIndices_ = localSmoothingIndices;
428  // for Two-stage GS
429  NumInnerSweeps_ = numInnerSweeps;
430  NumOuterSweeps_ = numOuterSweeps;
431  InnerSpTrsv_ = innerSpTrsv;
432  InnerDampingFactor_ = innerDampingFactor;
433  CompactForm_ = compactForm;
434  TimerForApply_ = timer_for_apply;
435 }
436 
437 template <class MatrixType>
439  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
440  // but otherwise, we will get [unused] in pl
441  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
442 }
443 
444 template <class MatrixType>
448  A_.is_null(), std::runtime_error,
449  "Ifpack2::Relaxation::getComm: "
450  "The input matrix A is null. Please call setMatrix() with a nonnull "
451  "input matrix before calling this method.");
452  return A_->getRowMap()->getComm();
453 }
454 
455 template <class MatrixType>
458  return A_;
459 }
460 
461 template <class MatrixType>
462 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
463  typename MatrixType::global_ordinal_type,
464  typename MatrixType::node_type>>
467  A_.is_null(), std::runtime_error,
468  "Ifpack2::Relaxation::getDomainMap: "
469  "The input matrix A is null. Please call setMatrix() with a nonnull "
470  "input matrix before calling this method.");
471  return A_->getDomainMap();
472 }
473 
474 template <class MatrixType>
475 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
476  typename MatrixType::global_ordinal_type,
477  typename MatrixType::node_type>>
480  A_.is_null(), std::runtime_error,
481  "Ifpack2::Relaxation::getRangeMap: "
482  "The input matrix A is null. Please call setMatrix() with a nonnull "
483  "input matrix before calling this method.");
484  return A_->getRangeMap();
485 }
486 
487 template <class MatrixType>
489  return true;
490 }
491 
492 template <class MatrixType>
494  return (NumInitialize_);
495 }
496 
497 template <class MatrixType>
499  return (NumCompute_);
500 }
501 
502 template <class MatrixType>
504  return (NumApply_);
505 }
506 
507 template <class MatrixType>
509  return (InitializeTime_);
510 }
511 
512 template <class MatrixType>
514  return (ComputeTime_);
515 }
516 
517 template <class MatrixType>
519  return (ApplyTime_);
520 }
521 
522 template <class MatrixType>
524  return (ComputeFlops_);
525 }
526 
527 template <class MatrixType>
529  return (ApplyFlops_);
530 }
531 
532 template <class MatrixType>
535  A_.is_null(), std::runtime_error,
536  "Ifpack2::Relaxation::getNodeSmootherComplexity: "
537  "The input matrix A is null. Please call setMatrix() with a nonnull "
538  "input matrix, then call compute(), before calling this method.");
539  // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
540  return A_->getLocalNumRows() + A_->getLocalNumEntries();
541 }
542 
543 template <class MatrixType>
545  apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
546  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
547  Teuchos::ETransp /* mode */,
548  scalar_type alpha,
549  scalar_type beta) const {
550  using Teuchos::as;
551  using Teuchos::RCP;
552  using Teuchos::rcp;
553  using Teuchos::rcpFromRef;
554  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
556  MV;
558  A_.is_null(), std::runtime_error,
559  "Ifpack2::Relaxation::apply: "
560  "The input matrix A is null. Please call setMatrix() with a nonnull "
561  "input matrix, then call compute(), before calling this method.");
563  !isComputed(),
564  std::runtime_error,
565  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
566  "preconditioner instance before you may call apply(). You may call "
567  "isComputed() to find out if compute() has been called already.");
568  TEUCHOS_TEST_FOR_EXCEPTION(
569  X.getNumVectors() != Y.getNumVectors(),
570  std::runtime_error,
571  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
572  "X has "
573  << X.getNumVectors() << " columns, but Y has "
574  << Y.getNumVectors() << " columns.");
575  TEUCHOS_TEST_FOR_EXCEPTION(
576  beta != STS::zero(), std::logic_error,
577  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
578  "implemented.");
579 
581  const std::string timerName("Ifpack2::Relaxation::apply");
582  if (TimerForApply_) {
583  timer = Teuchos::TimeMonitor::lookupCounter(timerName);
584  if (timer.is_null()) {
585  timer = Teuchos::TimeMonitor::getNewCounter(timerName);
586  }
587  }
588 
589  Teuchos::Time time = Teuchos::Time(timerName);
590  double startTime = time.wallTime();
591 
592  {
594  if (TimerForApply_)
595  timeMon = Teuchos::rcp(new Teuchos::TimeMonitor(*timer));
596 
597  // Special case: alpha == 0.
598  if (alpha == STS::zero()) {
599  // No floating-point operations, so no need to update a count.
600  Y.putScalar(STS::zero());
601  } else {
602  // If X and Y alias one another, then we need to create an
603  // auxiliary vector, Xcopy (a deep copy of X).
604  RCP<const MV> Xcopy;
605  {
606  if (X.aliases(Y)) {
607  Xcopy = rcp(new MV(X, Teuchos::Copy));
608  } else {
609  Xcopy = rcpFromRef(X);
610  }
611  }
612 
613  // Each of the following methods updates the flop count itself.
614  // All implementations handle zeroing out the starting solution
615  // (if necessary) themselves.
616  switch (PrecType_) {
617  case Ifpack2::Details::JACOBI:
618  ApplyInverseJacobi(*Xcopy, Y);
619  break;
620  case Ifpack2::Details::GS:
621  ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
622  break;
623  case Ifpack2::Details::SGS:
624  ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
625  break;
626  case Ifpack2::Details::MTGS:
627  case Ifpack2::Details::GS2:
628  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
629  break;
630  case Ifpack2::Details::MTSGS:
631  case Ifpack2::Details::SGS2:
632  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
633  break;
634  case Ifpack2::Details::RICHARDSON:
635  ApplyInverseRichardson(*Xcopy, Y);
636  break;
637 
638  default:
639  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
640  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
641  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
642  }
643  if (alpha != STS::one()) {
644  Y.scale(alpha);
645  const double numGlobalRows = as<double>(A_->getGlobalNumRows());
646  const double numVectors = as<double>(Y.getNumVectors());
647  ApplyFlops_ += numGlobalRows * numVectors;
648  }
649  }
650  }
651  ApplyTime_ += (time.wallTime() - startTime);
652  ++NumApply_;
653 }
654 
655 template <class MatrixType>
657  applyMat(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
658  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
659  Teuchos::ETransp mode) const {
661  A_.is_null(), std::runtime_error,
662  "Ifpack2::Relaxation::applyMat: "
663  "The input matrix A is null. Please call setMatrix() with a nonnull "
664  "input matrix, then call compute(), before calling this method.");
666  !isComputed(), std::runtime_error,
667  "Ifpack2::Relaxation::applyMat: "
668  "isComputed() must be true before you may call applyMat(). "
669  "Please call compute() before calling this method.");
670  TEUCHOS_TEST_FOR_EXCEPTION(
671  X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
672  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors()
673  << " != Y.getNumVectors() = " << Y.getNumVectors() << ".");
674  A_->apply(X, Y, mode);
675 }
676 
677 template <class MatrixType>
679  const char methodName[] = "Ifpack2::Relaxation::initialize";
680 
681  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, methodName << ": The "
682  "input matrix A is null. Please call setMatrix() with "
683  "a nonnull input matrix before calling this method.");
684 
687 
688  double startTime = timer->wallTime();
689 
690  { // Timing of initialize starts here
691  Teuchos::TimeMonitor timeMon(*timer);
692  isInitialized_ = false;
693 
694  {
695  auto rowMap = A_->getRowMap();
696  auto comm = rowMap.is_null() ? Teuchos::null : rowMap->getComm();
697  IsParallel_ = !comm.is_null() && comm->getSize() > 1;
698  }
699 
700  // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
701  // but in that case, the domain and column Maps are the same and
702  // we don't need to Import anyway.
703  //
704  // mfh 07 May 2019: A_->getGraph() might be an
705  // OverlappingRowGraph, which doesn't have an Import object.
706  // However, in that case, the comm should have size 1.
707  Importer_ = IsParallel_ ? A_->getGraph()->getImporter() : Teuchos::null;
708 
709  {
711  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(A_);
712  hasBlockCrsMatrix_ = !A_bcrs.is_null();
713  }
714 
715  serialGaussSeidel_ = Teuchos::null;
716 
717  if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
718  PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
719  auto crsMat = Details::getCrsMatrix(A_);
720  TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error, methodName << ": "
721  "Multithreaded Gauss-Seidel methods currently only work "
722  "when the input matrix is a Tpetra::CrsMatrix.");
723 
724  // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
725  // compute, not initialize, since users may change the matrix's
726  // values at any time before calling compute.
727  if (ifpack2_dump_matrix_) {
728  static int sequence_number = 0;
729  const std::string file_name = "Ifpack2_MT_GS_" +
730  std::to_string(sequence_number++) + ".mtx";
731  using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
732  writer_type::writeSparseFile(file_name, crsMat);
733  }
734 
735  this->mtKernelHandle_ = Teuchos::rcp(new mt_kernel_handle_type());
736  if (mtKernelHandle_->get_gs_handle() == nullptr) {
737  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
738  mtKernelHandle_->create_gs_handle(KokkosSparse::GS_TWOSTAGE);
739  else if (this->clusterSize_ == 1) {
740  mtKernelHandle_->create_gs_handle(KokkosSparse::GS_DEFAULT, this->mtColoringAlgorithm_);
741  mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
742  } else
743  mtKernelHandle_->create_gs_handle(KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_, this->mtColoringAlgorithm_);
744  }
745  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
746  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
747  // set parameters for two-stage GS
748  mtKernelHandle_->set_gs_set_num_inner_sweeps(NumInnerSweeps_);
749  mtKernelHandle_->set_gs_set_num_outer_sweeps(NumOuterSweeps_);
750  mtKernelHandle_->set_gs_set_inner_damp_factor(InnerDampingFactor_);
751  mtKernelHandle_->set_gs_twostage(!InnerSpTrsv_, A_->getLocalNumRows());
752  mtKernelHandle_->set_gs_twostage_compact_form(CompactForm_);
753  }
754 
755  KokkosSparse::gauss_seidel_symbolic(
756  mtKernelHandle_.getRawPtr(),
757  A_->getLocalNumRows(),
758  A_->getLocalNumCols(),
759  kcsr.graph.row_map,
760  kcsr.graph.entries,
761  is_matrix_structurally_symmetric_);
762  }
763  } // timing of initialize stops here
764 
765  InitializeTime_ += (timer->wallTime() - startTime);
766  ++NumInitialize_;
767  isInitialized_ = true;
768 }
769 
770 namespace Impl {
771 template <typename BlockDiagView>
772 struct InvertDiagBlocks {
773  typedef typename BlockDiagView::size_type Size;
774 
775  private:
776  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
777  template <typename View>
778  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
779  typename View::device_type, Unmanaged>;
780 
781  typedef typename BlockDiagView::non_const_value_type Scalar;
782  typedef typename BlockDiagView::device_type Device;
783  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
784  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
785 
786  UnmanagedView<BlockDiagView> block_diag_;
787  // TODO Use thread team and scratch memory space. In this first
788  // pass, provide workspace for each block.
789  RWrk rwrk_buf_;
790  UnmanagedView<RWrk> rwrk_;
791  IWrk iwrk_buf_;
792  UnmanagedView<IWrk> iwrk_;
793 
794  public:
795  InvertDiagBlocks(BlockDiagView& block_diag)
796  : block_diag_(block_diag) {
797  const auto blksz = block_diag.extent(1);
798  Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
799  rwrk_ = rwrk_buf_;
800  Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
801  iwrk_ = iwrk_buf_;
802  }
803 
804  KOKKOS_INLINE_FUNCTION
805  void operator()(const Size i, int& jinfo) const {
806  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
807  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
808  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
809  int info = 0;
810  Tpetra::GETF2(D_cur, ipiv, info);
811  if (info) {
812  ++jinfo;
813  return;
814  }
815  Tpetra::GETRI(D_cur, ipiv, work, info);
816  if (info) ++jinfo;
817  }
818 };
819 } // namespace Impl
820 
821 template <class MatrixType>
822 void Relaxation<MatrixType>::computeBlockCrs() {
823  using Kokkos::ALL;
824  using Teuchos::Array;
825  using Teuchos::ArrayRCP;
826  using Teuchos::ArrayView;
827  using Teuchos::as;
828  using Teuchos::Comm;
829  using Teuchos::RCP;
830  using Teuchos::rcp;
831  using Teuchos::rcp_dynamic_cast;
832  using Teuchos::REDUCE_MAX;
833  using Teuchos::REDUCE_MIN;
834  using Teuchos::REDUCE_SUM;
835  using Teuchos::reduceAll;
836  typedef local_ordinal_type LO;
837 
838  const std::string timerName("Ifpack2::Relaxation::computeBlockCrs");
840  if (timer.is_null()) {
841  timer = Teuchos::TimeMonitor::getNewCounter(timerName);
842  }
843  double startTime = timer->wallTime();
844  {
845  Teuchos::TimeMonitor timeMon(*timer);
846 
848  A_.is_null(), std::runtime_error,
849  "Ifpack2::Relaxation::"
850  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
851  "with a nonnull input matrix, then call initialize(), before calling "
852  "this method.");
853  auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type>(A_);
855  blockCrsA.is_null(), std::logic_error,
856  "Ifpack2::Relaxation::"
857  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
858  "got this far. Please report this bug to the Ifpack2 developers.");
859 
860  const scalar_type one = STS::one();
861 
862  // Reset state.
863  IsComputed_ = false;
864 
865  const LO lclNumMeshRows =
866  blockCrsA->getCrsGraph().getLocalNumRows();
867  const LO blockSize = blockCrsA->getBlockSize();
868 
869  if (!savedDiagOffsets_) {
870  blockDiag_ = block_diag_type(); // clear it before reallocating
871  blockDiag_ = block_diag_type("Ifpack2::Relaxation::blockDiag_",
872  lclNumMeshRows, blockSize, blockSize);
873  if (Teuchos::as<LO>(diagOffsets_.extent(0)) < lclNumMeshRows) {
874  // Clear diagOffsets_ first (by assigning an empty View to it)
875  // to save memory, before reallocating.
876  diagOffsets_ = Kokkos::View<size_t*, device_type>();
877  diagOffsets_ = Kokkos::View<size_t*, device_type>("offsets", lclNumMeshRows);
878  }
879  blockCrsA->getCrsGraph().getLocalDiagOffsets(diagOffsets_);
880  TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(diagOffsets_.extent(0)) !=
881  static_cast<size_t>(blockDiag_.extent(0)),
882  std::logic_error, "diagOffsets_.extent(0) = " << diagOffsets_.extent(0) << " != blockDiag_.extent(0) = " << blockDiag_.extent(0) << ". Please report this bug to the Ifpack2 developers.");
883  savedDiagOffsets_ = true;
884  }
885  blockCrsA->getLocalDiagCopy(blockDiag_, diagOffsets_);
886 
887  // Use an unmanaged View in this method, so that when we take
888  // subviews of it (to get each diagonal block), we don't have to
889  // touch the reference count. Reference count updates are a
890  // thread scalability bottleneck and have a performance cost even
891  // without using threads.
892  unmanaged_block_diag_type blockDiag = blockDiag_;
893 
894  if (DoL1Method_ && IsParallel_) {
895  const scalar_type two = one + one;
896  const size_t maxLength = A_->getLocalMaxNumRowEntries();
897  nonconst_local_inds_host_view_type indices("indices", maxLength);
898  nonconst_values_host_view_type values_("values", maxLength * blockSize * blockSize);
899  size_t numEntries = 0;
900 
901  for (LO i = 0; i < lclNumMeshRows; ++i) {
902  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
903  blockCrsA->getLocalRowCopy(i, indices, values_, numEntries);
904  scalar_type* values = reinterpret_cast<scalar_type*>(values_.data());
905 
906  auto diagBlock = Kokkos::subview(blockDiag, i, ALL(), ALL());
907  for (LO subRow = 0; subRow < blockSize; ++subRow) {
908  magnitude_type diagonal_boost = STM::zero();
909  for (size_t k = 0; k < numEntries; ++k) {
910  if (indices[k] >= lclNumMeshRows) {
911  const size_t offset = blockSize * blockSize * k + subRow * blockSize;
912  for (LO subCol = 0; subCol < blockSize; ++subCol) {
913  diagonal_boost += STS::magnitude(values[offset + subCol] / two);
914  }
915  }
916  }
917  if (STS::magnitude(diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
918  diagBlock(subRow, subRow) += diagonal_boost;
919  }
920  }
921  }
922  }
923 
924  int info = 0;
925  {
926  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
927  typedef typename unmanaged_block_diag_type::execution_space exec_space;
928  typedef Kokkos::RangePolicy<exec_space, LO> range_type;
929 
930  Kokkos::parallel_reduce(range_type(0, lclNumMeshRows), idb, info);
931  // FIXME (mfh 19 May 2017) Different processes may not
932  // necessarily have this error all at the same time, so it would
933  // be better just to preserve a local error state and let users
934  // check.
935  TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info << " diagonal blocks.");
936  }
937 
938  // In a debug build, do an extra test to make sure that all the
939  // factorizations were computed correctly.
940 #ifdef HAVE_IFPACK2_DEBUG
941  const int numResults = 2;
942  // Use "max = -min" trick to get min and max in a single all-reduce.
943  int lclResults[2], gblResults[2];
944  lclResults[0] = info;
945  lclResults[1] = -info;
946  gblResults[0] = 0;
947  gblResults[1] = 0;
948  reduceAll<int, int>(*(A_->getGraph()->getComm()), REDUCE_MIN,
949  numResults, lclResults, gblResults);
950  TEUCHOS_TEST_FOR_EXCEPTION(gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
951  "Ifpack2::Relaxation::compute: When processing the input "
952  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
953  "failed on one or more (MPI) processes.");
954 #endif // HAVE_IFPACK2_DEBUG
955  serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
956  } // end TimeMonitor scope
957 
958  ComputeTime_ += (timer->wallTime() - startTime);
959  ++NumCompute_;
960  IsComputed_ = true;
961 }
962 
963 template <class MatrixType>
965  using Teuchos::Array;
966  using Teuchos::ArrayRCP;
967  using Teuchos::ArrayView;
968  using Teuchos::as;
969  using Teuchos::Comm;
970  using Teuchos::RCP;
971  using Teuchos::rcp;
972  using Teuchos::REDUCE_MAX;
973  using Teuchos::REDUCE_MIN;
974  using Teuchos::REDUCE_SUM;
975  using Teuchos::reduceAll;
976  using LO = local_ordinal_type;
977  using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
979  using IST = typename vector_type::impl_scalar_type;
980 #if KOKKOS_VERSION >= 40799
981  using KAT = KokkosKernels::ArithTraits<IST>;
982 #else
983  using KAT = Kokkos::ArithTraits<IST>;
984 #endif
985 
986  const char methodName[] = "Ifpack2::Relaxation::compute";
987  const scalar_type zero = STS::zero();
988  const scalar_type one = STS::one();
989 
990  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, methodName << ": "
991  "The input matrix A is null. Please call setMatrix() with a nonnull "
992  "input matrix, then call initialize(), before calling this method.");
993 
994  // We don't count initialization in compute() time.
995  if (!isInitialized()) {
996  initialize();
997  }
998 
999  if (hasBlockCrsMatrix_) {
1000  computeBlockCrs();
1001  return;
1002  }
1003 
1006  double startTime = timer->wallTime();
1007 
1008  { // Timing of compute starts here.
1009  Teuchos::TimeMonitor timeMon(*timer);
1010 
1011  // Precompute some quantities for "fixing" zero or tiny diagonal
1012  // entries. We'll only use them if this "fixing" is enabled.
1013  //
1014  // SmallTraits covers for the lack of eps() in
1015  // Teuchos::ScalarTraits when its template parameter is not a
1016  // floating-point type. (Ifpack2 sometimes gets instantiated for
1017  // integer Scalar types.)
1018  IST oneOverMinDiagVal = KAT::one() / static_cast<IST>(SmallTraits<scalar_type>::eps());
1019  if (MinDiagonalValue_ != zero)
1020  oneOverMinDiagVal = KAT::one() / static_cast<IST>(MinDiagonalValue_);
1021 
1022  // It's helpful not to have to recompute this magnitude each time.
1023  const magnitude_type minDiagValMag = STS::magnitude(MinDiagonalValue_);
1024 
1025  const LO numMyRows = static_cast<LO>(A_->getLocalNumRows());
1026 
1027  TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error, methodName << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1028  "Please report this bug to the Ifpack2 developers.");
1029  IsComputed_ = false;
1030 
1031  if (Diagonal_.is_null()) {
1032  // A_->getLocalDiagCopy fills in all Vector entries, even if the
1033  // matrix has no stored entries in the corresponding rows.
1034  Diagonal_ = rcp(new vector_type(A_->getRowMap(), false));
1035  }
1036 
1037  if (checkDiagEntries_) {
1038  //
1039  // Check diagonal elements, replace zero elements with the minimum
1040  // diagonal value, and store their inverses. Count the number of
1041  // "small" and zero diagonal entries while we're at it.
1042  //
1043  size_t numSmallDiagEntries = 0; // "small" includes zero
1044  size_t numZeroDiagEntries = 0; // # zero diagonal entries
1045  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1046  magnitude_type minMagDiagEntryMag = STM::zero();
1047  magnitude_type maxMagDiagEntryMag = STM::zero();
1048 
1049  // FIXME: We are extracting the diagonal more than once. That
1050  // isn't very efficient, but we shouldn't be checking
1051  // diagonal entries at scale anyway.
1052  A_->getLocalDiagCopy(*Diagonal_); // slow path for row matrix
1053  std::unique_ptr<vector_type> origDiag;
1054  origDiag = std::unique_ptr<vector_type>(new vector_type(*Diagonal_, Teuchos::Copy));
1055  auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1056  auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1057  // As we go, keep track of the diagonal entries with the
1058  // least and greatest magnitude. We could use the trick of
1059  // starting min with +Inf and max with -Inf, but that
1060  // doesn't work if scalar_type is a built-in integer type.
1061  // Thus, we have to start by reading the first diagonal
1062  // entry redundantly.
1063  if (numMyRows != 0) {
1064  const magnitude_type d_0_mag = KAT::abs(diag(0));
1065  minMagDiagEntryMag = d_0_mag;
1066  maxMagDiagEntryMag = d_0_mag;
1067  }
1068 
1069  // Go through all the diagonal entries. Compute counts of
1070  // small-magnitude, zero, and negative-real-part entries.
1071  // Invert the diagonal entries that aren't too small. For
1072  // those too small in magnitude, replace them with
1073  // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1074  // happens to be zero).
1075  for (LO i = 0; i < numMyRows; ++i) {
1076  const IST d_i = diag(i);
1077  const magnitude_type d_i_mag = KAT::abs(d_i);
1078  // Work-around for GitHub Issue #5269.
1079  // const magnitude_type d_i_real = KAT::real (d_i);
1080  const auto d_i_real = getRealValue(d_i);
1081 
1082  // We can't compare complex numbers, but we can compare their
1083  // real parts.
1084  if (d_i_real < STM::zero()) {
1085  ++numNegDiagEntries;
1086  }
1087  if (d_i_mag < minMagDiagEntryMag) {
1088  minMagDiagEntryMag = d_i_mag;
1089  }
1090  if (d_i_mag > maxMagDiagEntryMag) {
1091  maxMagDiagEntryMag = d_i_mag;
1092  }
1093 
1094  if (fixTinyDiagEntries_) {
1095  // <= not <, in case minDiagValMag is zero.
1096  if (d_i_mag <= minDiagValMag) {
1097  ++numSmallDiagEntries;
1098  if (d_i_mag == STM::zero()) {
1099  ++numZeroDiagEntries;
1100  }
1101  diag(i) = oneOverMinDiagVal;
1102  } else {
1103  diag(i) = KAT::one() / d_i;
1104  }
1105  } else { // Don't fix zero or tiny diagonal entries.
1106  // <= not <, in case minDiagValMag is zero.
1107  if (d_i_mag <= minDiagValMag) {
1108  ++numSmallDiagEntries;
1109  if (d_i_mag == STM::zero()) {
1110  ++numZeroDiagEntries;
1111  }
1112  }
1113  diag(i) = KAT::one() / d_i;
1114  }
1115  }
1116 
1117  // Collect global data about the diagonal entries. Only do this
1118  // (which involves O(1) all-reduces) if the user asked us to do
1119  // the extra work.
1120  //
1121  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1122  // zero rows. Fixing this requires one of two options:
1123  //
1124  // 1. Use a custom reduction operation that ignores processes
1125  // with zero diagonal entries.
1126  // 2. Split the communicator, compute all-reduces using the
1127  // subcommunicator over processes that have a nonzero number
1128  // of diagonal entries, and then broadcast from one of those
1129  // processes (if there is one) to the processes in the other
1130  // subcommunicator.
1131  const Comm<int>& comm = *(A_->getRowMap()->getComm());
1132 
1133  // Compute global min and max magnitude of entries.
1134  Array<magnitude_type> localVals(2);
1135  localVals[0] = minMagDiagEntryMag;
1136  // (- (min (- x))) is the same as (max x).
1137  localVals[1] = -maxMagDiagEntryMag;
1138  Array<magnitude_type> globalVals(2);
1139  reduceAll<int, magnitude_type>(comm, REDUCE_MIN, 2,
1140  localVals.getRawPtr(),
1141  globalVals.getRawPtr());
1142  globalMinMagDiagEntryMag_ = globalVals[0];
1143  globalMaxMagDiagEntryMag_ = -globalVals[1];
1144 
1145  Array<size_t> localCounts(3);
1146  localCounts[0] = numSmallDiagEntries;
1147  localCounts[1] = numZeroDiagEntries;
1148  localCounts[2] = numNegDiagEntries;
1149  Array<size_t> globalCounts(3);
1150  reduceAll<int, size_t>(comm, REDUCE_SUM, 3,
1151  localCounts.getRawPtr(),
1152  globalCounts.getRawPtr());
1153  globalNumSmallDiagEntries_ = globalCounts[0];
1154  globalNumZeroDiagEntries_ = globalCounts[1];
1155  globalNumNegDiagEntries_ = globalCounts[2];
1156 
1157  // Compute and save the difference between the computed inverse
1158  // diagonal, and the original diagonal's inverse.
1159  vector_type diff(A_->getRowMap());
1160  diff.reciprocal(*origDiag);
1161  diff.update(-one, *Diagonal_, one);
1162  globalDiagNormDiff_ = diff.norm2();
1163  }
1164 
1165  // Extract the diagonal entries. The CrsMatrix graph version is
1166  // faster than the row matrix version for subsequent calls to
1167  // compute(), since it caches the diagonal offsets.
1168 
1169  bool debugAgainstSlowPath = false;
1170 
1171  auto crsMat = Details::getCrsMatrix(A_);
1172 
1173  if (crsMat.get() && crsMat->isFillComplete()) {
1174  // The invDiagKernel object computes diagonal offsets if
1175  // necessary. The "compute" call extracts diagonal enties,
1176  // optionally applies the L1 method and replacement of small
1177  // entries, and then inverts.
1178  if (invDiagKernel_.is_null())
1179  invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(crsMat));
1180  else
1181  invDiagKernel_->setMatrix(crsMat);
1182  invDiagKernel_->compute(*Diagonal_,
1183  DoL1Method_ && IsParallel_, L1Eta_,
1184  fixTinyDiagEntries_, minDiagValMag);
1185  savedDiagOffsets_ = true;
1186 
1187  // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1188  // environment variable to control this behavior at run time.
1189 #ifdef HAVE_IFPACK2_DEBUG
1190  debugAgainstSlowPath = true;
1191 #endif
1192  }
1193 
1194  if (crsMat.is_null() || !crsMat->isFillComplete() || debugAgainstSlowPath) {
1195  // We could not call the CrsMatrix version above, or want to
1196  // debug by comparing against the slow path.
1197 
1198  // FIXME: The L1 method in this code path runs on host, since we
1199  // don't have device access for row matrices.
1200 
1201  RCP<vector_type> Diagonal;
1202  if (debugAgainstSlowPath)
1203  Diagonal = rcp(new vector_type(A_->getRowMap()));
1204  else
1205  Diagonal = Diagonal_;
1206 
1207  A_->getLocalDiagCopy(*Diagonal); // slow path for row matrix
1208 
1209  // Setup for L1 Methods.
1210  // Here we add half the value of the off-processor entries in the row,
1211  // but only if diagonal isn't sufficiently large.
1212  //
1213  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1214  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1215  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1216  //
1217  if (DoL1Method_ && IsParallel_) {
1218  const row_matrix_type& A_row = *A_;
1219  auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1220  const magnitude_type two = STM::one() + STM::one();
1221  const size_t maxLength = A_row.getLocalMaxNumRowEntries();
1222  nonconst_local_inds_host_view_type indices("indices", maxLength);
1223  nonconst_values_host_view_type values("values", maxLength);
1224  size_t numEntries;
1225 
1226  for (LO i = 0; i < numMyRows; ++i) {
1227  A_row.getLocalRowCopy(i, indices, values, numEntries);
1228  magnitude_type diagonal_boost = STM::zero();
1229  for (size_t k = 0; k < numEntries; ++k) {
1230  if (indices[k] >= numMyRows) {
1231  diagonal_boost += STS::magnitude(values[k] / two);
1232  }
1233  }
1234  if (KAT::magnitude(diag(i, 0)) < L1Eta_ * diagonal_boost) {
1235  diag(i, 0) += diagonal_boost;
1236  }
1237  }
1238  }
1239 
1240  //
1241  // Invert the matrix's diagonal entries (in Diagonal).
1242  //
1243  if (fixTinyDiagEntries_) {
1244  // Go through all the diagonal entries. Invert those that
1245  // aren't too small in magnitude. For those that are too
1246  // small in magnitude, replace them with oneOverMinDiagVal.
1247  auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1248  Kokkos::parallel_for(
1249  Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1250  KOKKOS_LAMBDA(local_ordinal_type i) {
1251  auto d_i = localDiag(i, 0);
1252  const magnitude_type d_i_mag = KAT::magnitude(d_i);
1253  // <= not <, in case minDiagValMag is zero.
1254  if (d_i_mag <= minDiagValMag) {
1255  d_i = oneOverMinDiagVal;
1256  } else {
1257  // For Stokhos types, operator/ returns an expression
1258  // type. Explicitly convert to IST before returning.
1259  d_i = IST(KAT::one() / d_i);
1260  }
1261  localDiag(i, 0) = d_i;
1262  });
1263  } else { // don't fix tiny or zero diagonal entries
1264  Diagonal->reciprocal(*Diagonal);
1265  }
1266 
1267  if (debugAgainstSlowPath) {
1268  // Validate the fast-path diagonal against the slow-path diagonal.
1269  Diagonal->update(STS::one(), *Diagonal_, -STS::one());
1270  const magnitude_type err = Diagonal->normInf();
1271  // The two diagonals should be exactly the same, so their
1272  // difference should be exactly zero.
1273  TEUCHOS_TEST_FOR_EXCEPTION(err > 100 * STM::eps(), std::logic_error, methodName << ": "
1274  << "\"fast-path\" diagonal computation failed. "
1275  "\\|D1 - D2\\|_inf = "
1276  << err << ".");
1277  }
1278  }
1279 
1280  // Count floating-point operations of computing the inverse diagonal.
1281  //
1282  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1283  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1284  ComputeFlops_ += 4.0 * numMyRows;
1285  } else {
1286  ComputeFlops_ += numMyRows;
1287  }
1288 
1289  if (PrecType_ == Ifpack2::Details::MTGS ||
1290  PrecType_ == Ifpack2::Details::MTSGS ||
1291  PrecType_ == Ifpack2::Details::GS2 ||
1292  PrecType_ == Ifpack2::Details::SGS2) {
1293  // KokkosKernels GaussSeidel Initialization.
1294  using scalar_view_t = typename local_matrix_device_type::values_type;
1295 
1296  TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error, methodName << ": "
1297  "Multithreaded Gauss-Seidel methods currently only work "
1298  "when the input matrix is a Tpetra::CrsMatrix.");
1299  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
1300 
1301  // TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1302  // const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1303  auto diagView_2d = Diagonal_->getLocalViewDevice(Tpetra::Access::ReadWrite);
1304  scalar_view_t diagView_1d = Kokkos::subview(diagView_2d, Kokkos::ALL(), 0);
1305  KokkosSparse::gauss_seidel_numeric(
1306  mtKernelHandle_.getRawPtr(),
1307  A_->getLocalNumRows(),
1308  A_->getLocalNumCols(),
1309  kcsr.graph.row_map,
1310  kcsr.graph.entries,
1311  kcsr.values,
1312  diagView_1d,
1313  is_matrix_structurally_symmetric_);
1314  } else if (PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1315  if (crsMat)
1316  serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1317  else
1318  serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1319  }
1320  } // end TimeMonitor scope
1321 
1322  ComputeTime_ += (timer->wallTime() - startTime);
1323  ++NumCompute_;
1324  IsComputed_ = true;
1325 }
1326 
1327 template <class MatrixType>
1329  ApplyInverseRichardson(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1330  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y) const {
1331  using Teuchos::as;
1332  const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1333  const double numVectors = as<double>(X.getNumVectors());
1334  if (ZeroStartingSolution_) {
1335  // For the first Richardson sweep, if we are allowed to assume that
1336  // the initial guess is zero, then Richardson is just alpha times the RHS
1337  // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1338  Y.scale(DampingFactor_, X);
1339 
1340  // Count (global) floating-point operations. Ifpack2 represents
1341  // this as a floating-point number rather than an integer, so that
1342  // overflow (for a very large number of calls, or a very large
1343  // problem) is approximate instead of catastrophic.
1344  double flopUpdate = 0.0;
1345  if (DampingFactor_ == STS::one()) {
1346  // Y(i,j) = X(i,j): one multiply for each entry of Y.
1347  flopUpdate = numGlobalRows * numVectors;
1348  } else {
1349  // Y(i,j) = DampingFactor_ * X(i,j):
1350  // One multiplies per entry of Y.
1351  flopUpdate = numGlobalRows * numVectors;
1352  }
1353  ApplyFlops_ += flopUpdate;
1354  if (NumSweeps_ == 1) {
1355  return;
1356  }
1357  }
1358  // If we were allowed to assume that the starting guess was zero,
1359  // then we have already done the first sweep above.
1360  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1361 
1362  // Allocate a multivector if the cached one isn't perfect
1363  updateCachedMultiVector(Y.getMap(), as<size_t>(numVectors));
1364 
1365  for (int j = startSweep; j < NumSweeps_; ++j) {
1366  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1367  Tpetra::Details::residual(*A_, Y, X, *cachedMV_);
1368  Y.update(DampingFactor_, *cachedMV_, STS::one());
1369  }
1370 
1371  // For each column of output, for each pass over the matrix:
1372  //
1373  // - One + and one * for each matrix entry
1374  // - One / and one + for each row of the matrix
1375  // - If the damping factor is not one: one * for each row of the
1376  // matrix. (It's not fair to count this if the damping factor is
1377  // one, since the implementation could skip it. Whether it does
1378  // or not is the implementation's choice.)
1379 
1380  // Floating-point operations due to the damping factor, per matrix
1381  // row, per direction, per columm of output.
1382  const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1383  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1384  ApplyFlops_ += as<double>(NumSweeps_ - startSweep) * numVectors *
1385  (2.0 * numGlobalNonzeros + dampingFlops);
1386 }
1387 
1388 template <class MatrixType>
1389 void Relaxation<MatrixType>::
1390  ApplyInverseJacobi(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1391  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y) const {
1392  using Teuchos::as;
1393  if (hasBlockCrsMatrix_) {
1394  ApplyInverseJacobi_BlockCrsMatrix(X, Y);
1395  return;
1396  }
1397 
1398  const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1399  const double numVectors = as<double>(X.getNumVectors());
1400  if (ZeroStartingSolution_) {
1401  // For the first Jacobi sweep, if we are allowed to assume that
1402  // the initial guess is zero, then Jacobi is just diagonal
1403  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1404  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1405  Y.elementWiseMultiply(DampingFactor_, *Diagonal_, X, STS::zero());
1406 
1407  // Count (global) floating-point operations. Ifpack2 represents
1408  // this as a floating-point number rather than an integer, so that
1409  // overflow (for a very large number of calls, or a very large
1410  // problem) is approximate instead of catastrophic.
1411  double flopUpdate = 0.0;
1412  if (DampingFactor_ == STS::one()) {
1413  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1414  flopUpdate = numGlobalRows * numVectors;
1415  } else {
1416  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1417  // Two multiplies per entry of Y.
1418  flopUpdate = 2.0 * numGlobalRows * numVectors;
1419  }
1420  ApplyFlops_ += flopUpdate;
1421  if (NumSweeps_ == 1) {
1422  return;
1423  }
1424  }
1425  // If we were allowed to assume that the starting guess was zero,
1426  // then we have already done the first sweep above.
1427  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1428 
1429  // Allocate a multivector if the cached one isn't perfect
1430  updateCachedMultiVector(Y.getMap(), as<size_t>(numVectors));
1431 
1432  for (int j = startSweep; j < NumSweeps_; ++j) {
1433  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1434  Tpetra::Details::residual(*A_, Y, X, *cachedMV_);
1435  Y.elementWiseMultiply(DampingFactor_, *Diagonal_, *cachedMV_, STS::one());
1436  }
1437 
1438  // For each column of output, for each pass over the matrix:
1439  //
1440  // - One + and one * for each matrix entry
1441  // - One / and one + for each row of the matrix
1442  // - If the damping factor is not one: one * for each row of the
1443  // matrix. (It's not fair to count this if the damping factor is
1444  // one, since the implementation could skip it. Whether it does
1445  // or not is the implementation's choice.)
1446 
1447  // Floating-point operations due to the damping factor, per matrix
1448  // row, per direction, per columm of output.
1449  const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1450  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1451  ApplyFlops_ += as<double>(NumSweeps_ - startSweep) * numVectors *
1452  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1453 }
1454 
1455 template <class MatrixType>
1456 void Relaxation<MatrixType>::
1457  ApplyInverseJacobi_BlockCrsMatrix(const Tpetra::MultiVector<scalar_type,
1458  local_ordinal_type,
1459  global_ordinal_type,
1460  node_type>& X,
1461  Tpetra::MultiVector<scalar_type,
1462  local_ordinal_type,
1463  global_ordinal_type,
1464  node_type>& Y) const {
1465  using Tpetra::BlockMultiVector;
1466  using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1467  global_ordinal_type, node_type>;
1468 
1469  const block_crs_matrix_type* blockMatConst =
1470  dynamic_cast<const block_crs_matrix_type*>(A_.getRawPtr());
1471  TEUCHOS_TEST_FOR_EXCEPTION(blockMatConst == nullptr, std::logic_error,
1472  "This method should "
1473  "never be called if the matrix A_ is not a BlockCrsMatrix. "
1474  "Please report this bug to the Ifpack2 developers.");
1475  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1476  // This is because applyBlock() is nonconst (more accurate), while
1477  // apply() is const (required by Tpetra::Operator interface, but a
1478  // lie, because it possibly allocates temporary buffers).
1479  block_crs_matrix_type* blockMat =
1480  const_cast<block_crs_matrix_type*>(blockMatConst);
1481 
1482  auto meshRowMap = blockMat->getRowMap();
1483  auto meshColMap = blockMat->getColMap();
1484  const local_ordinal_type blockSize = blockMat->getBlockSize();
1485 
1486  BMV X_blk(X, *meshColMap, blockSize);
1487  BMV Y_blk(Y, *meshRowMap, blockSize);
1488 
1489  if (ZeroStartingSolution_) {
1490  // For the first sweep, if we are allowed to assume that the
1491  // initial guess is zero, then block Jacobi is just block diagonal
1492  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1493  Y_blk.blockWiseMultiply(DampingFactor_, blockDiag_, X_blk);
1494  if (NumSweeps_ == 1) {
1495  return;
1496  }
1497  }
1498 
1499  auto pointRowMap = Y.getMap();
1500  const size_t numVecs = X.getNumVectors();
1501 
1502  // We don't need to initialize the result MV, since the sparse
1503  // mat-vec will clobber its contents anyway.
1504  BMV A_times_Y(*meshRowMap, *pointRowMap, blockSize, numVecs);
1505 
1506  // If we were allowed to assume that the starting guess was zero,
1507  // then we have already done the first sweep above.
1508  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1509 
1510  for (int j = startSweep; j < NumSweeps_; ++j) {
1511  blockMat->applyBlock(Y_blk, A_times_Y);
1512  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1513  Y_blk.blockJacobiUpdate(DampingFactor_, blockDiag_,
1514  X_blk, A_times_Y, STS::one());
1515  }
1516 }
1517 
1518 template <class MatrixType>
1519 void Relaxation<MatrixType>::
1520  ApplyInverseSerialGS(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1521  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1522  Tpetra::ESweepDirection direction) const {
1523  using this_type = Relaxation<MatrixType>;
1524  // The CrsMatrix version is faster, because it can access the sparse
1525  // matrix data directly, rather than by copying out each row's data
1526  // in turn. Thus, we check whether the RowMatrix is really a
1527  // CrsMatrix.
1528  //
1529  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1530  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1531  // will still be correct if the cast fails, but it will use an
1532  // unoptimized kernel.
1533  auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(A_);
1534  auto crsMat = Details::getCrsMatrix(A_);
1535  if (blockCrsMat.get()) {
1536  const_cast<this_type&>(*this).ApplyInverseSerialGS_BlockCrsMatrix(*blockCrsMat, X, Y, direction);
1537  } else if (crsMat.get()) {
1538  ApplyInverseSerialGS_CrsMatrix(*crsMat, X, Y, direction);
1539  } else {
1540  ApplyInverseSerialGS_RowMatrix(X, Y, direction);
1541  }
1542 }
1543 
1544 template <class MatrixType>
1545 void Relaxation<MatrixType>::
1546  ApplyInverseSerialGS_RowMatrix(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1547  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1548  Tpetra::ESweepDirection direction) const {
1549  using Teuchos::Array;
1550  using Teuchos::ArrayRCP;
1551  using Teuchos::ArrayView;
1552  using Teuchos::as;
1553  using Teuchos::RCP;
1554  using Teuchos::rcp;
1555  using Teuchos::rcpFromRef;
1556  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV;
1557 
1558  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1559  // starting multivector itself. The generic RowMatrix version here
1560  // does not, so we have to zero out Y here.
1561  if (ZeroStartingSolution_) {
1562  Y.putScalar(STS::zero());
1563  }
1564 
1565  size_t NumVectors = X.getNumVectors();
1566 
1567  RCP<MV> Y2;
1568  if (IsParallel_) {
1569  if (Importer_.is_null()) { // domain and column Maps are the same.
1570  updateCachedMultiVector(Y.getMap(), NumVectors);
1571  } else {
1572  updateCachedMultiVector(Importer_->getTargetMap(), NumVectors);
1573  }
1574  Y2 = cachedMV_;
1575  } else {
1576  Y2 = rcpFromRef(Y);
1577  }
1578 
1579  for (int j = 0; j < NumSweeps_; ++j) {
1580  // data exchange is here, once per sweep
1581  if (IsParallel_) {
1582  if (Importer_.is_null()) {
1583  // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1584  // clear if this is correct. Reevaluate at some point.
1585 
1586  *Y2 = Y; // just copy, since domain and column Maps are the same
1587  } else {
1588  Y2->doImport(Y, *Importer_, Tpetra::INSERT);
1589  }
1590  }
1591  serialGaussSeidel_->apply(*Y2, X, direction);
1592 
1593  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1594  if (IsParallel_) {
1595  Tpetra::deep_copy(Y, *Y2);
1596  }
1597  }
1598 
1599  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1600  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1601  const double numVectors = as<double>(X.getNumVectors());
1602  const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1603  const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1604  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1605  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1606 }
1607 
1608 template <class MatrixType>
1609 void Relaxation<MatrixType>::
1610  ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1611  const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& B,
1612  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1613  Tpetra::ESweepDirection direction) const {
1614  using Teuchos::null;
1615  using Teuchos::RCP;
1616  using Teuchos::rcp;
1617  using Teuchos::rcp_const_cast;
1618  using Teuchos::rcpFromRef;
1619  typedef scalar_type Scalar;
1620  const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1621  const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1622 
1624  !A.isFillComplete(), std::runtime_error,
1625  prefix << "The matrix is not fill complete.");
1627  NumSweeps_ < 0, std::invalid_argument,
1628  prefix << "The number of sweeps must be nonnegative, "
1629  "but you provided numSweeps = "
1630  << NumSweeps_ << " < 0.");
1631 
1632  if (NumSweeps_ == 0) {
1633  return;
1634  }
1635 
1636  RCP<const import_type> importer = A.getGraph()->getImporter();
1637 
1638  RCP<const map_type> domainMap = A.getDomainMap();
1639  RCP<const map_type> rangeMap = A.getRangeMap();
1640  RCP<const map_type> rowMap = A.getGraph()->getRowMap();
1641  RCP<const map_type> colMap = A.getGraph()->getColMap();
1642 
1643 #ifdef HAVE_IFPACK2_DEBUG
1644  {
1645  // The relation 'isSameAs' is transitive. It's also a
1646  // collective, so we don't have to do a "shared" test for
1647  // exception (i.e., a global reduction on the test value).
1649  !X.getMap()->isSameAs(*domainMap), std::runtime_error,
1650  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1651  "multivector X be in the domain Map of the matrix.");
1653  !B.getMap()->isSameAs(*rangeMap), std::runtime_error,
1654  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1655  "B be in the range Map of the matrix.");
1657  !Diagonal_->getMap()->isSameAs(*rowMap), std::runtime_error,
1658  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1659  "D be in the row Map of the matrix.");
1661  !rowMap->isSameAs(*rangeMap), std::runtime_error,
1662  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1663  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1665  !domainMap->isSameAs(*rangeMap), std::runtime_error,
1666  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1667  "the range Map of the matrix be the same.");
1668  }
1669 #endif
1670 
1671  // Fetch a (possibly cached) temporary column Map multivector
1672  // X_colMap, and a domain Map view X_domainMap of it. Both have
1673  // constant stride by construction. We know that the domain Map
1674  // must include the column Map, because our Gauss-Seidel kernel
1675  // requires that the row Map, domain Map, and range Map are all
1676  // the same, and that each process owns all of its own diagonal
1677  // entries of the matrix.
1678 
1679  RCP<multivector_type> X_colMap;
1680  RCP<multivector_type> X_domainMap;
1681  bool copyBackOutput = false;
1682  if (importer.is_null()) {
1683  X_colMap = Teuchos::rcpFromRef(X);
1684  X_domainMap = Teuchos::rcpFromRef(X);
1685  // Column Map and domain Map are the same, so there are no
1686  // remote entries. Thus, if we are not setting the initial
1687  // guess to zero, we don't have to worry about setting remote
1688  // entries to zero, even though we are not doing an Import in
1689  // this case.
1690  if (ZeroStartingSolution_) {
1691  X_colMap->putScalar(ZERO);
1692  }
1693  // No need to copy back to X at end.
1694  } else { // Column Map and domain Map are _not_ the same.
1695  updateCachedMultiVector(colMap, X.getNumVectors());
1696  X_colMap = cachedMV_;
1697  X_domainMap = X_colMap->offsetViewNonConst(domainMap, 0);
1698 
1699  if (ZeroStartingSolution_) {
1700  // No need for an Import, since we're filling with zeros.
1701  X_colMap->putScalar(ZERO);
1702  } else {
1703  // We could just copy X into X_domainMap. However, that
1704  // wastes a copy, because the Import also does a copy (plus
1705  // communication). Since the typical use case for
1706  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1707  // don't want to waste that copy. Thus, we do the Import
1708  // here, and skip the first Import in the first sweep.
1709  // Importing directly from X effects the copy into X_domainMap
1710  // (which is a view of X_colMap).
1711  X_colMap->doImport(X, *importer, Tpetra::INSERT);
1712  }
1713  copyBackOutput = true; // Don't forget to copy back at end.
1714  } // if column and domain Maps are (not) the same
1715 
1716  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1717  if (!importer.is_null() && sweep > 0) {
1718  // We already did the first Import for the zeroth sweep above,
1719  // if it was necessary.
1720  X_colMap->doImport(*X_domainMap, *importer, Tpetra::INSERT);
1721  }
1722  // Do local Gauss-Seidel (forward, backward or symmetric)
1723  serialGaussSeidel_->apply(*X_colMap, B, direction);
1724  }
1725 
1726  if (copyBackOutput) {
1727  try {
1728  deep_copy(X, *X_domainMap); // Copy result back into X.
1729  } catch (std::exception& e) {
1731  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1732  "threw an exception: "
1733  << e.what());
1734  }
1735  }
1736 
1737  // For each column of output, for each sweep over the matrix:
1738  //
1739  // - One + and one * for each matrix entry
1740  // - One / and one + for each row of the matrix
1741  // - If the damping factor is not one: one * for each row of the
1742  // matrix. (It's not fair to count this if the damping factor is
1743  // one, since the implementation could skip it. Whether it does
1744  // or not is the implementation's choice.)
1745 
1746  // Floating-point operations due to the damping factor, per matrix
1747  // row, per direction, per columm of output.
1748  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1749  const double numVectors = X.getNumVectors();
1750  const double numGlobalRows = A_->getGlobalNumRows();
1751  const double numGlobalNonzeros = A_->getGlobalNumEntries();
1752  ApplyFlops_ += NumSweeps_ * numVectors *
1753  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1754 }
1755 
1756 template <class MatrixType>
1757 void Relaxation<MatrixType>::
1758  ApplyInverseSerialGS_BlockCrsMatrix(const block_crs_matrix_type& A,
1759  const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1760  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1761  Tpetra::ESweepDirection direction) {
1762  using Teuchos::RCP;
1763  using Teuchos::rcp;
1764  using Teuchos::rcpFromRef;
1765  using Tpetra::INSERT;
1766 
1767  // FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1768  //
1769  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1770  // multiple right-hand sides, unless the input or output MultiVector
1771  // does not have constant stride. We should check for that case
1772  // here, in case it doesn't work in localGaussSeidel (which is
1773  // entirely possible).
1774  block_multivector_type yBlock(Y, *(A.getGraph()->getDomainMap()), A.getBlockSize());
1775  const block_multivector_type xBlock(X, *(A.getColMap()), A.getBlockSize());
1776 
1777  bool performImport = false;
1778  RCP<block_multivector_type> yBlockCol;
1779  if (Importer_.is_null()) {
1780  yBlockCol = rcpFromRef(yBlock);
1781  } else {
1782  if (yBlockColumnPointMap_.is_null() ||
1783  yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1784  yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1785  yBlockColumnPointMap_ =
1786  rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1787  static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1788  }
1789  yBlockCol = yBlockColumnPointMap_;
1790  if (pointImporter_.is_null()) {
1791  auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1792  auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1793  pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1794  }
1795  performImport = true;
1796  }
1797 
1798  multivector_type yBlock_mv;
1799  multivector_type yBlockCol_mv;
1800  RCP<const multivector_type> yBlockColPointDomain;
1801  if (performImport) { // create views (shallow copies)
1802  yBlock_mv = yBlock.getMultiVectorView();
1803  yBlockCol_mv = yBlockCol->getMultiVectorView();
1804  yBlockColPointDomain =
1805  yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1806  }
1807 
1808  if (ZeroStartingSolution_) {
1809  yBlockCol->putScalar(STS::zero());
1810  } else if (performImport) {
1811  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1812  }
1813 
1814  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1815  if (performImport && sweep > 0) {
1816  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1817  }
1818  serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1819  if (performImport) {
1820  Tpetra::deep_copy(Y, *yBlockColPointDomain);
1821  }
1822  }
1823 }
1824 
1825 template <class MatrixType>
1826 void Relaxation<MatrixType>::
1827  ApplyInverseMTGS_CrsMatrix(
1828  const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& B,
1829  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1830  Tpetra::ESweepDirection direction) const {
1831  using Teuchos::as;
1832  using Teuchos::null;
1833  using Teuchos::RCP;
1834  using Teuchos::rcp;
1835  using Teuchos::rcp_const_cast;
1836  using Teuchos::rcpFromRef;
1837 
1838  typedef scalar_type Scalar;
1839 
1840  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1841  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1842 
1843  auto crsMat = Details::getCrsMatrix(A_);
1844  TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error,
1845  "Ifpack2::Relaxation::apply: "
1846  "Multithreaded Gauss-Seidel methods currently only work when the "
1847  "input matrix is a Tpetra::CrsMatrix.");
1848 
1849  // Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1850  TEUCHOS_TEST_FOR_EXCEPTION(!localSmoothingIndices_.is_null(), std::logic_error,
1851  "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1852  "implement the case where the user supplies an iteration order. "
1853  "This error used to appear as \"MT GaussSeidel ignores the given "
1854  "order\". "
1855  "I tried to add more explanation, but I didn't implement \"MT "
1856  "GaussSeidel\" [sic]. "
1857  "You'll have to ask the person who did.");
1858 
1859  TEUCHOS_TEST_FOR_EXCEPTION(!crsMat->isFillComplete(), std::runtime_error, prefix << "The "
1860  "input CrsMatrix is not fill complete. Please call fillComplete "
1861  "on the matrix, then do setup again, before calling apply(). "
1862  "\"Do setup\" means that if only the matrix's values have changed "
1863  "since last setup, you need only call compute(). If the matrix's "
1864  "structure may also have changed, you must first call initialize(), "
1865  "then call compute(). If you have not set up this preconditioner "
1866  "for this matrix before, you must first call initialize(), then "
1867  "call compute().");
1868  TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = " << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1869  "developers.");
1870  if (NumSweeps_ == 0) {
1871  return;
1872  }
1873 
1874  RCP<const import_type> importer = crsMat->getGraph()->getImporter();
1876  !crsMat->getGraph()->getExporter().is_null(), std::runtime_error,
1877  "This method's implementation currently requires that the matrix's row, "
1878  "domain, and range Maps be the same. This cannot be the case, because "
1879  "the matrix has a nontrivial Export object.");
1880 
1881  RCP<const map_type> domainMap = crsMat->getDomainMap();
1882  RCP<const map_type> rangeMap = crsMat->getRangeMap();
1883  RCP<const map_type> rowMap = crsMat->getGraph()->getRowMap();
1884  RCP<const map_type> colMap = crsMat->getGraph()->getColMap();
1885 
1886 #ifdef HAVE_IFPACK2_DEBUG
1887  {
1888  // The relation 'isSameAs' is transitive. It's also a
1889  // collective, so we don't have to do a "shared" test for
1890  // exception (i.e., a global reduction on the test value).
1892  !X.getMap()->isSameAs(*domainMap), std::runtime_error,
1893  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1894  "multivector X be in the domain Map of the matrix.");
1896  !B.getMap()->isSameAs(*rangeMap), std::runtime_error,
1897  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1898  "B be in the range Map of the matrix.");
1900  !rowMap->isSameAs(*rangeMap), std::runtime_error,
1901  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1902  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1904  !domainMap->isSameAs(*rangeMap), std::runtime_error,
1905  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1906  "the range Map of the matrix be the same.");
1907  }
1908 #else
1909  // Forestall any compiler warnings for unused variables.
1910  (void)rangeMap;
1911  (void)rowMap;
1912 #endif // HAVE_IFPACK2_DEBUG
1913 
1914  // Fetch a (possibly cached) temporary column Map multivector
1915  // X_colMap, and a domain Map view X_domainMap of it. Both have
1916  // constant stride by construction. We know that the domain Map
1917  // must include the column Map, because our Gauss-Seidel kernel
1918  // requires that the row Map, domain Map, and range Map are all
1919  // the same, and that each process owns all of its own diagonal
1920  // entries of the matrix.
1921 
1922  RCP<multivector_type> X_colMap;
1923  RCP<multivector_type> X_domainMap;
1924  bool copyBackOutput = false;
1925  if (importer.is_null()) {
1926  if (X.isConstantStride()) {
1927  X_colMap = rcpFromRef(X);
1928  X_domainMap = rcpFromRef(X);
1929 
1930  // Column Map and domain Map are the same, so there are no
1931  // remote entries. Thus, if we are not setting the initial
1932  // guess to zero, we don't have to worry about setting remote
1933  // entries to zero, even though we are not doing an Import in
1934  // this case.
1935  if (ZeroStartingSolution_) {
1936  X_colMap->putScalar(ZERO);
1937  }
1938  // No need to copy back to X at end.
1939  } else {
1940  // We must copy X into a constant stride multivector.
1941  // Just use the cached column Map multivector for that.
1942  // force=true means fill with zeros, so no need to fill
1943  // remote entries (not in domain Map) with zeros.
1944  updateCachedMultiVector(colMap, as<size_t>(X.getNumVectors()));
1945  X_colMap = cachedMV_;
1946  // X_domainMap is always a domain Map view of the column Map
1947  // multivector. In this case, the domain and column Maps are
1948  // the same, so X_domainMap _is_ X_colMap.
1949  X_domainMap = X_colMap;
1950  if (!ZeroStartingSolution_) { // Don't copy if zero initial guess
1951  try {
1952  deep_copy(*X_domainMap, X); // Copy X into constant stride MV
1953  } catch (std::exception& e) {
1954  std::ostringstream os;
1955  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1956  "deep_copy(*X_domainMap, X) threw an exception: "
1957  << e.what() << ".";
1958  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what());
1959  }
1960  }
1961  copyBackOutput = true; // Don't forget to copy back at end.
1962  /*
1963  TPETRA_EFFICIENCY_WARNING(
1964  ! X.isConstantStride (),
1965  std::runtime_error,
1966  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
1967  "kernel requires that X and B both have constant stride. Since X "
1968  "does not have constant stride, we had to make a copy. This is a "
1969  "limitation of the current implementation and not your fault, but we "
1970  "still report it as an efficiency warning for your information.");
1971  */
1972  }
1973  } else { // Column Map and domain Map are _not_ the same.
1974  updateCachedMultiVector(colMap, as<size_t>(X.getNumVectors()));
1975  X_colMap = cachedMV_;
1976 
1977  X_domainMap = X_colMap->offsetViewNonConst(domainMap, 0);
1978 
1979  if (ZeroStartingSolution_) {
1980  // No need for an Import, since we're filling with zeros.
1981  X_colMap->putScalar(ZERO);
1982  } else {
1983  // We could just copy X into X_domainMap. However, that
1984  // wastes a copy, because the Import also does a copy (plus
1985  // communication). Since the typical use case for
1986  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1987  // don't want to waste that copy. Thus, we do the Import
1988  // here, and skip the first Import in the first sweep.
1989  // Importing directly from X effects the copy into X_domainMap
1990  // (which is a view of X_colMap).
1991  X_colMap->doImport(X, *importer, Tpetra::CombineMode::INSERT);
1992  }
1993  copyBackOutput = true; // Don't forget to copy back at end.
1994  } // if column and domain Maps are (not) the same
1995 
1996  // The Gauss-Seidel / SOR kernel expects multivectors of constant
1997  // stride. X_colMap is by construction, but B might not be. If
1998  // it's not, we have to make a copy.
1999  RCP<const multivector_type> B_in;
2000  if (B.isConstantStride()) {
2001  B_in = rcpFromRef(B);
2002  } else {
2003  // Range Map and row Map are the same in this case, so we can
2004  // use the cached row Map multivector to store a constant stride
2005  // copy of B.
2006  RCP<multivector_type> B_in_nonconst = rcp(new multivector_type(rowMap, B.getNumVectors()));
2007  try {
2008  deep_copy(*B_in_nonconst, B);
2009  } catch (std::exception& e) {
2010  std::ostringstream os;
2011  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2012  "deep_copy(*B_in_nonconst, B) threw an exception: "
2013  << e.what() << ".";
2014  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what());
2015  }
2016  B_in = rcp_const_cast<const multivector_type>(B_in_nonconst);
2017 
2018  /*
2019  TPETRA_EFFICIENCY_WARNING(
2020  ! B.isConstantStride (),
2021  std::runtime_error,
2022  "MTGaussSeidel: The current implementation requires that B have "
2023  "constant stride. Since B does not have constant stride, we had to "
2024  "copy it into a separate constant-stride multivector. This is a "
2025  "limitation of the current implementation and not your fault, but we "
2026  "still report it as an efficiency warning for your information.");
2027  */
2028  }
2029 
2030  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
2031 
2032  bool update_y_vector = true;
2033  // false as it was done up already, and we dont want to zero it in each sweep.
2034  bool zero_x_vector = false;
2035 
2036  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2037  if (!importer.is_null() && sweep > 0) {
2038  // We already did the first Import for the zeroth sweep above,
2039  // if it was necessary.
2040  X_colMap->doImport(*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2041  }
2042 
2043  if (direction == Tpetra::Symmetric) {
2044  KokkosSparse::symmetric_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2045  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2046  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2047  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2048  zero_x_vector, update_y_vector, DampingFactor_, 1);
2049  } else if (direction == Tpetra::Forward) {
2050  KokkosSparse::forward_sweep_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2051  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2052  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2053  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2054  zero_x_vector, update_y_vector, DampingFactor_, 1);
2055  } else if (direction == Tpetra::Backward) {
2056  KokkosSparse::backward_sweep_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2057  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2058  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2059  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2060  zero_x_vector, update_y_vector, DampingFactor_, 1);
2061  } else {
2063  true, std::invalid_argument,
2064  prefix << "The 'direction' enum does not have any of its valid "
2065  "values: Forward, Backward, or Symmetric.");
2066  }
2067  update_y_vector = false;
2068  }
2069 
2070  if (copyBackOutput) {
2071  try {
2072  deep_copy(X, *X_domainMap); // Copy result back into X.
2073  } catch (std::exception& e) {
2075  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2076  "threw an exception: "
2077  << e.what());
2078  }
2079  }
2080 
2081  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2082  const double numVectors = as<double>(X.getNumVectors());
2083  const double numGlobalRows = as<double>(A_->getGlobalNumRows());
2084  const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
2085  double ApplyFlops = NumSweeps_ * numVectors *
2086  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2087  if (direction == Tpetra::Symmetric)
2088  ApplyFlops *= 2.0;
2089  ApplyFlops_ += ApplyFlops;
2090 }
2091 
2092 template <class MatrixType>
2094  std::ostringstream os;
2095 
2096  // Output is a valid YAML dictionary in flow style. If you don't
2097  // like everything on a single line, you should call describe()
2098  // instead.
2099  os << "\"Ifpack2::Relaxation\": {";
2100 
2101  os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
2102  << "Computed: " << (isComputed() ? "true" : "false") << ", ";
2103 
2104  // It's useful to print this instance's relaxation method (Jacobi,
2105  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2106  // than that, call describe() instead.
2107  os << "Type: ";
2108  if (PrecType_ == Ifpack2::Details::JACOBI) {
2109  os << "Jacobi";
2110  } else if (PrecType_ == Ifpack2::Details::GS) {
2111  os << "Gauss-Seidel";
2112  } else if (PrecType_ == Ifpack2::Details::SGS) {
2113  os << "Symmetric Gauss-Seidel";
2114  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2115  os << "MT Gauss-Seidel";
2116  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2117  os << "MT Symmetric Gauss-Seidel";
2118  } else if (PrecType_ == Ifpack2::Details::GS2) {
2119  os << "Two-stage Gauss-Seidel";
2120  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2121  os << "Two-stage Symmetric Gauss-Seidel";
2122  } else {
2123  os << "INVALID";
2124  }
2125  if (hasBlockCrsMatrix_)
2126  os << ", BlockCrs";
2127 
2128  os << ", "
2129  << "sweeps: " << NumSweeps_ << ", "
2130  << "damping factor: " << DampingFactor_ << ", ";
2131 
2132  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2133  os << "\"relaxation: mtgs cluster size\": " << clusterSize_ << ", ";
2134  os << "\"relaxation: long row threshold\": " << longRowThreshold_ << ", ";
2135  os << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << ", ";
2136  os << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2137  switch (mtColoringAlgorithm_) {
2138  case KokkosGraph::COLORING_DEFAULT:
2139  os << "DEFAULT";
2140  break;
2141  case KokkosGraph::COLORING_SERIAL:
2142  os << "SERIAL";
2143  break;
2144  case KokkosGraph::COLORING_VB:
2145  os << "VB";
2146  break;
2147  case KokkosGraph::COLORING_VBBIT:
2148  os << "VBBIT";
2149  break;
2150  case KokkosGraph::COLORING_VBCS:
2151  os << "VBCS";
2152  break;
2153  case KokkosGraph::COLORING_VBD:
2154  os << "VBD";
2155  break;
2156  case KokkosGraph::COLORING_VBDBIT:
2157  os << "VBDBIT";
2158  break;
2159  case KokkosGraph::COLORING_EB:
2160  os << "EB";
2161  break;
2162  default:
2163  os << "*Invalid*";
2164  }
2165  os << ", ";
2166  }
2167 
2168  if (PrecType_ == Ifpack2::Details::GS2 ||
2169  PrecType_ == Ifpack2::Details::SGS2) {
2170  os << "outer sweeps: " << NumOuterSweeps_ << ", "
2171  << "inner sweeps: " << NumInnerSweeps_ << ", "
2172  << "inner damping factor: " << InnerDampingFactor_ << ", ";
2173  }
2174 
2175  if (DoL1Method_) {
2176  os << "use l1: " << DoL1Method_ << ", "
2177  << "l1 eta: " << L1Eta_ << ", ";
2178  }
2179 
2180  if (A_.is_null()) {
2181  os << "Matrix: null";
2182  } else {
2183  os << "Global matrix dimensions: ["
2184  << A_->getGlobalNumRows() << ", " << A_->getGlobalNumCols() << "]"
2185  << ", Global nnz: " << A_->getGlobalNumEntries();
2186  }
2187 
2188  os << "}";
2189  return os.str();
2190 }
2191 
2192 template <class MatrixType>
2195  const Teuchos::EVerbosityLevel verbLevel) const {
2196  using std::endl;
2197  using Teuchos::OSTab;
2199  using Teuchos::VERB_DEFAULT;
2200  using Teuchos::VERB_EXTREME;
2201  using Teuchos::VERB_HIGH;
2202  using Teuchos::VERB_LOW;
2203  using Teuchos::VERB_MEDIUM;
2204  using Teuchos::VERB_NONE;
2205 
2206  const Teuchos::EVerbosityLevel vl =
2207  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2208 
2209  const int myRank = this->getComm()->getRank();
2210 
2211  // none: print nothing
2212  // low: print O(1) info from Proc 0
2213  // medium:
2214  // high:
2215  // extreme:
2216 
2217  if (vl != VERB_NONE && myRank == 0) {
2218  // Describable interface asks each implementation to start with a tab.
2219  OSTab tab1(out);
2220  // Output is valid YAML; hence the quotes, to protect the colons.
2221  out << "\"Ifpack2::Relaxation\":" << endl;
2222  OSTab tab2(out);
2223  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name() << "\""
2224  << endl;
2225  if (this->getObjectLabel() != "") {
2226  out << "Label: " << this->getObjectLabel() << endl;
2227  }
2228  out << "Initialized: " << (isInitialized() ? "true" : "false") << endl
2229  << "Computed: " << (isComputed() ? "true" : "false") << endl
2230  << "Parameters: " << endl;
2231  {
2232  OSTab tab3(out);
2233  out << "\"relaxation: type\": ";
2234  if (PrecType_ == Ifpack2::Details::JACOBI) {
2235  out << "Jacobi";
2236  } else if (PrecType_ == Ifpack2::Details::GS) {
2237  out << "Gauss-Seidel";
2238  } else if (PrecType_ == Ifpack2::Details::SGS) {
2239  out << "Symmetric Gauss-Seidel";
2240  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2241  out << "MT Gauss-Seidel";
2242  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2243  out << "MT Symmetric Gauss-Seidel";
2244  } else if (PrecType_ == Ifpack2::Details::GS2) {
2245  out << "Two-stage Gauss-Seidel";
2246  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2247  out << "Two-stage Symmetric Gauss-Seidel";
2248  } else {
2249  out << "INVALID";
2250  }
2251  // We quote these parameter names because they contain colons.
2252  // YAML uses the colon to distinguish key from value.
2253  out << endl
2254  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2255  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2256  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2257  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2258  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2259  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2260  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2261  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2262  out << "\"relaxation: mtgs cluster size\": " << clusterSize_ << endl;
2263  out << "\"relaxation: long row threshold\": " << longRowThreshold_ << endl;
2264  out << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << endl;
2265  out << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2266  switch (mtColoringAlgorithm_) {
2267  case KokkosGraph::COLORING_DEFAULT:
2268  out << "DEFAULT";
2269  break;
2270  case KokkosGraph::COLORING_SERIAL:
2271  out << "SERIAL";
2272  break;
2273  case KokkosGraph::COLORING_VB:
2274  out << "VB";
2275  break;
2276  case KokkosGraph::COLORING_VBBIT:
2277  out << "VBBIT";
2278  break;
2279  case KokkosGraph::COLORING_VBCS:
2280  out << "VBCS";
2281  break;
2282  case KokkosGraph::COLORING_VBD:
2283  out << "VBD";
2284  break;
2285  case KokkosGraph::COLORING_VBDBIT:
2286  out << "VBDBIT";
2287  break;
2288  case KokkosGraph::COLORING_EB:
2289  out << "EB";
2290  break;
2291  default:
2292  out << "*Invalid*";
2293  }
2294  out << endl;
2295  }
2296  if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2297  out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2298  out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2299  out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2300  }
2301  }
2302  out << "Computed quantities:" << endl;
2303  {
2304  OSTab tab3(out);
2305  out << "Global number of rows: " << A_->getGlobalNumRows() << endl
2306  << "Global number of columns: " << A_->getGlobalNumCols() << endl;
2307  }
2308  if (checkDiagEntries_ && isComputed()) {
2309  out << "Properties of input diagonal entries:" << endl;
2310  {
2311  OSTab tab3(out);
2312  out << "Magnitude of minimum-magnitude diagonal entry: "
2313  << globalMinMagDiagEntryMag_ << endl
2314  << "Magnitude of maximum-magnitude diagonal entry: "
2315  << globalMaxMagDiagEntryMag_ << endl
2316  << "Number of diagonal entries with small magnitude: "
2317  << globalNumSmallDiagEntries_ << endl
2318  << "Number of zero diagonal entries: "
2319  << globalNumZeroDiagEntries_ << endl
2320  << "Number of diagonal entries with negative real part: "
2321  << globalNumNegDiagEntries_ << endl
2322  << "Abs 2-norm diff between computed and actual inverse "
2323  << "diagonal: " << globalDiagNormDiff_ << endl;
2324  }
2325  }
2326  if (isComputed()) {
2327  out << "Saved diagonal offsets: "
2328  << (savedDiagOffsets_ ? "true" : "false") << endl;
2329  }
2330  out << "Call counts and total times (in seconds): " << endl;
2331  {
2332  OSTab tab3(out);
2333  out << "initialize: " << endl;
2334  {
2335  OSTab tab4(out);
2336  out << "Call count: " << NumInitialize_ << endl;
2337  out << "Total time: " << InitializeTime_ << endl;
2338  }
2339  out << "compute: " << endl;
2340  {
2341  OSTab tab4(out);
2342  out << "Call count: " << NumCompute_ << endl;
2343  out << "Total time: " << ComputeTime_ << endl;
2344  }
2345  out << "apply: " << endl;
2346  {
2347  OSTab tab4(out);
2348  out << "Call count: " << NumApply_ << endl;
2349  out << "Total time: " << ApplyTime_ << endl;
2350  }
2351  }
2352  }
2353 }
2354 
2355 } // namespace Ifpack2
2356 
2357 #define IFPACK2_RELAXATION_INSTANT(S, LO, GO, N) \
2358  template class Ifpack2::Relaxation<Tpetra::RowMatrix<S, LO, GO, N>>;
2359 
2360 #endif // IFPACK2_RELAXATION_DEF_HPP
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:488
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:523
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:528
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2194
bool is_null(const boost::shared_ptr< T > &p)
basic_OSTab< char > OSTab
static magnitudeType eps()
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:186
T & get(const std::string &name, T def_value)
void compute()
Compute the preconditioner (&quot;numeric setup&quot;);.
Definition: Ifpack2_Relaxation_def.hpp:964
static RCP< Time > getNewCounter(const std::string &name)
static RCP< Time > lookupCounter(const std::string &name)
virtual void printDoc(std::string const &docString, std::ostream &out) const =0
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:533
virtual const std::string getXMLTypeName() const =0
Diagonal preconditioner.
Definition: Ifpack2_Diagonal_decl.hpp:38
static std::ostream & printLines(std::ostream &os, const std::string &linePrefix, const std::string &lines)
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:446
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:166
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:503
T * getRawPtr() const
bool remove(std::string const &name, bool throwIfNotExists=true)
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:513
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:230
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:465
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:438
File for utility functions.
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2093
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:493
void validateParametersAndSetDefaults(ParameterList const &validParamList, int const depth=1000)
virtual ValidStringsList validStringValues() const =0
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:478
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:227
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:508
any & getAny(bool activeQry=true)
Compute scaled damped residual for Chebyshev.
Definition: Ifpack2_Details_InverseDiagonalKernel_decl.hpp:45
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:657
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:457
TypeTo as(const TypeFrom &t)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:224
bool isType(const std::string &name) const
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:221
std::string typeName() const
const std::type_info & type() const
static double wallTime()
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:498
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:206
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:518
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:18
T * get() const
void initialize()
Initialize the preconditioner (&quot;symbolic setup&quot;).
Definition: Ifpack2_Relaxation_def.hpp:678
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Relaxation_decl.hpp:241
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:545
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:195
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:236
virtual void validate(ParameterEntry const &entry, std::string const &paramName, std::string const &sublistName) const =0
bool is_null() const