Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Relaxation_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_RELAXATION_DEF_HPP
11 #define IFPACK2_RELAXATION_DEF_HPP
12 
13 #include "Teuchos_StandardParameterEntryValidators.hpp"
14 #include "Teuchos_TimeMonitor.hpp"
15 #include "Tpetra_CrsMatrix.hpp"
16 #include "Tpetra_BlockCrsMatrix.hpp"
17 #include "Tpetra_BlockView.hpp"
18 #include "Ifpack2_Utilities.hpp"
19 #include "Ifpack2_Details_getCrsMatrix.hpp"
20 #include "MatrixMarket_Tpetra.hpp"
21 #include "Tpetra_Details_residual.hpp"
22 #include <cstdlib>
23 #include <memory>
24 #include <sstream>
25 #include "KokkosSparse_gauss_seidel.hpp"
26 
27 namespace {
28  // Validate that a given int is nonnegative.
29  class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
30  public:
31  // Constructor (does nothing).
32  NonnegativeIntValidator () {}
33 
34  // ParameterEntryValidator wants this method.
36  return Teuchos::null;
37  }
38 
39  // Actually validate the parameter's value.
40  void
41  validate (const Teuchos::ParameterEntry& entry,
42  const std::string& paramName,
43  const std::string& sublistName) const
44  {
45  using std::endl;
46  Teuchos::any anyVal = entry.getAny (true);
47  const std::string entryName = entry.getAny (false).typeName ();
48 
50  anyVal.type () != typeid (int),
52  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
53  << "\" has the wrong type." << endl << "Parameter: " << paramName
54  << endl << "Type specified: " << entryName << endl
55  << "Type required: int" << endl);
56 
57  const int val = Teuchos::any_cast<int> (anyVal);
60  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
61  << "\" is negative." << endl << "Parameter: " << paramName
62  << endl << "Value specified: " << val << endl
63  << "Required range: [0, INT_MAX]" << endl);
64  }
65 
66  // ParameterEntryValidator wants this method.
67  const std::string getXMLTypeName () const {
68  return "NonnegativeIntValidator";
69  }
70 
71  // ParameterEntryValidator wants this method.
72  void
73  printDoc (const std::string& docString,
74  std::ostream &out) const
75  {
76  Teuchos::StrUtils::printLines (out, "# ", docString);
77  out << "#\tValidator Used: " << std::endl;
78  out << "#\t\tNonnegativeIntValidator" << std::endl;
79  }
80  };
81 
82  // A way to get a small positive number (eps() for floating-point
83  // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
84  // define it (for example, for integer values).
85  template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
86  class SmallTraits {
87  public:
88  // Return eps if Scalar is a floating-point type, else return 1.
89  static const Scalar eps ();
90  };
91 
92  // Partial specialization for when Scalar is not a floating-point type.
93  template<class Scalar>
94  class SmallTraits<Scalar, true> {
95  public:
96  static const Scalar eps () {
98  }
99  };
100 
101  // Partial specialization for when Scalar is a floating-point type.
102  template<class Scalar>
103  class SmallTraits<Scalar, false> {
104  public:
105  static const Scalar eps () {
107  }
108  };
109 
110  // Work-around for GitHub Issue #5269.
111  template<class Scalar,
112  const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
113  struct RealTraits {};
114 
115  template<class Scalar>
116  struct RealTraits<Scalar, false> {
117  using val_type = Scalar;
118  using mag_type = Scalar;
119  static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
120  return z;
121  }
122  };
123 
124  template<class Scalar>
125  struct RealTraits<Scalar, true> {
126  using val_type = Scalar;
127  using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
128  static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
129  return Kokkos::ArithTraits<val_type>::real (z);
130  }
131  };
132 
133  template<class Scalar>
134  KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
135  getRealValue (const Scalar& z) {
136  return RealTraits<Scalar>::real (z);
137  }
138 
139 } // namespace (anonymous)
140 
141 namespace Ifpack2 {
142 
143 template<class MatrixType>
144 void
145 Relaxation<MatrixType>::
146 updateCachedMultiVector (const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
147  size_t numVecs) const
148 {
149  // Allocate a multivector if the cached one isn't perfect. Checking
150  // for map pointer equality is much cheaper than Map::isSameAs.
151  if (cachedMV_.is_null () ||
152  map.get () != cachedMV_->getMap ().get () ||
153  cachedMV_->getNumVectors () != numVecs) {
154  using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
155  global_ordinal_type, node_type>;
156  cachedMV_ = Teuchos::rcp (new MV (map, numVecs, false));
157  }
158 }
159 
160 template<class MatrixType>
163 {
164  if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
165  Importer_ = Teuchos::null;
166  pointImporter_ = Teuchos::null;
167  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
168  isInitialized_ = false;
169  IsComputed_ = false;
170  diagOffsets_ = Kokkos::View<size_t*, device_type>();
171  savedDiagOffsets_ = false;
172  hasBlockCrsMatrix_ = false;
173  serialGaussSeidel_ = Teuchos::null;
174  if (! A.is_null ()) {
175  IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
176  }
177  A_ = A;
178  }
179 }
180 
181 template<class MatrixType>
184 : A_ (A),
185  IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
186  false : // a reasonable default if there's no communicator
187  A->getRowMap ()->getComm ()->getSize () > 1)
188 {
189  this->setObjectLabel ("Ifpack2::Relaxation");
190 }
191 
192 
193 template<class MatrixType>
196 {
197  using Teuchos::Array;
199  using Teuchos::parameterList;
200  using Teuchos::RCP;
201  using Teuchos::rcp;
202  using Teuchos::rcp_const_cast;
203  using Teuchos::rcp_implicit_cast;
204  using Teuchos::setStringToIntegralParameter;
206 
207  if (validParams_.is_null ()) {
208  RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
209 
210  // Set a validator that automatically converts from the valid
211  // string options to their enum values.
212  Array<std::string> precTypes (8);
213  precTypes[0] = "Jacobi";
214  precTypes[1] = "Gauss-Seidel";
215  precTypes[2] = "Symmetric Gauss-Seidel";
216  precTypes[3] = "MT Gauss-Seidel";
217  precTypes[4] = "MT Symmetric Gauss-Seidel";
218  precTypes[5] = "Richardson";
219  precTypes[6] = "Two-stage Gauss-Seidel";
220  precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
221  Array<Details::RelaxationType> precTypeEnums (8);
222  precTypeEnums[0] = Details::JACOBI;
223  precTypeEnums[1] = Details::GS;
224  precTypeEnums[2] = Details::SGS;
225  precTypeEnums[3] = Details::MTGS;
226  precTypeEnums[4] = Details::MTSGS;
227  precTypeEnums[5] = Details::RICHARDSON;
228  precTypeEnums[6] = Details::GS2;
229  precTypeEnums[7] = Details::SGS2;
230  const std::string defaultPrecType ("Jacobi");
231  setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
232  defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
233  pl.getRawPtr ());
234 
235  const int numSweeps = 1;
236  RCP<PEV> numSweepsValidator =
237  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
238  pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
239  rcp_const_cast<const PEV> (numSweepsValidator));
240 
241  // number of 'local' outer sweeps for two-stage GS
242  const int numOuterSweeps = 1;
243  RCP<PEV> numOuterSweepsValidator =
244  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
245  pl->set ("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
246  rcp_const_cast<const PEV> (numOuterSweepsValidator));
247  // number of 'local' inner sweeps for two-stage GS
248  const int numInnerSweeps = 1;
249  RCP<PEV> numInnerSweepsValidator =
250  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
251  pl->set ("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
252  rcp_const_cast<const PEV> (numInnerSweepsValidator));
253  // specify damping factor for the inner sweeps of two-stage GS
254  const scalar_type innerDampingFactor = STS::one ();
255  pl->set ("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
256  // specify whether to use sptrsv instead of inner-iterations for two-stage GS
257  const bool innerSpTrsv = false;
258  pl->set ("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
259  // specify whether to use compact form of recurrence for two-stage GS
260  const bool compactForm = false;
261  pl->set ("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
262 
263  const scalar_type dampingFactor = STS::one ();
264  pl->set ("relaxation: damping factor", dampingFactor);
265 
266  const bool zeroStartingSolution = true;
267  pl->set ("relaxation: zero starting solution", zeroStartingSolution);
268 
269  const bool doBackwardGS = false;
270  pl->set ("relaxation: backward mode", doBackwardGS);
271 
272  const bool doL1Method = false;
273  pl->set ("relaxation: use l1", doL1Method);
274 
275  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
276  (STM::one() + STM::one()); // 1.5
277  pl->set ("relaxation: l1 eta", l1eta);
278 
279  const scalar_type minDiagonalValue = STS::zero ();
280  pl->set ("relaxation: min diagonal value", minDiagonalValue);
281 
282  const bool fixTinyDiagEntries = false;
283  pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
284 
285  const bool checkDiagEntries = false;
286  pl->set ("relaxation: check diagonal entries", checkDiagEntries);
287 
288  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
289  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
290 
291  const bool is_matrix_structurally_symmetric = false;
292  pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
293 
294  const bool ifpack2_dump_matrix = false;
295  pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
296 
297  const int cluster_size = 1;
298  pl->set("relaxation: mtgs cluster size", cluster_size);
299 
300  pl->set("relaxation: mtgs coloring algorithm", "Default");
301 
302  const int long_row_threshold = 0;
303  pl->set("relaxation: long row threshold", long_row_threshold);
304 
305  const bool timer_for_apply = true;
306  pl->set("timer for apply", timer_for_apply);
307 
308  validParams_ = rcp_const_cast<const ParameterList> (pl);
309  }
310  return validParams_;
311 }
312 
313 
314 template<class MatrixType>
316 {
317  using Teuchos::getIntegralValue;
318  using Teuchos::getStringValue;
320  using Teuchos::RCP;
321  typedef scalar_type ST; // just to make code below shorter
322 
323  if (pl.isType<double>("relaxation: damping factor")) {
324  // Make sure that ST=complex can run with a damping factor that is
325  // a double.
326  ST df = pl.get<double>("relaxation: damping factor");
327  pl.remove("relaxation: damping factor");
328  pl.set("relaxation: damping factor",df);
329  }
330 
332 
333  const Details::RelaxationType precType =
334  getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
335  const std::string precTypeStr = getStringValue<Details::RelaxationType>(pl, "relaxation: type");
336  // We only access "relaxation: type" using strings in the rest of the code
337  pl.set<std::string>("relaxation: type", precTypeStr);
338  pl.get<std::string>("relaxation: type"); // We need to mark the parameter as "used"
339  const int numSweeps = pl.get<int> ("relaxation: sweeps");
340  const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
341  const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
342  const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
343  const bool doL1Method = pl.get<bool> ("relaxation: use l1");
344  const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
345  const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
346  const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
347  const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
348  const bool is_matrix_structurally_symmetric = pl.get<bool> ("relaxation: symmetric matrix structure");
349  const bool ifpack2_dump_matrix = pl.get<bool> ("relaxation: ifpack2 dump matrix");
350  const bool timer_for_apply = pl.get<bool> ("timer for apply");
351  int cluster_size = 1;
352  if(pl.isParameter ("relaxation: mtgs cluster size")) //optional parameter
353  cluster_size = pl.get<int> ("relaxation: mtgs cluster size");
354  int long_row_threshold = 0;
355  if(pl.isParameter ("relaxation: long row threshold")) //optional parameter
356  long_row_threshold = pl.get<int> ("relaxation: long row threshold");
357  std::string color_algo_name = pl.get<std::string>("relaxation: mtgs coloring algorithm");
358  //convert to lowercase
359  for(char& c : color_algo_name)
360  c = tolower(c);
361  if(color_algo_name == "default")
362  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_DEFAULT;
363  else if(color_algo_name == "serial")
364  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_SERIAL;
365  else if(color_algo_name == "vb")
366  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VB;
367  else if(color_algo_name == "vbbit")
368  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBBIT;
369  else if(color_algo_name == "vbcs")
370  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBCS;
371  else if(color_algo_name == "vbd")
372  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBD;
373  else if(color_algo_name == "vbdbit")
374  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBDBIT;
375  else if(color_algo_name == "eb")
376  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_EB;
377  else
378  {
379  std::ostringstream msg;
380  msg << "Ifpack2::Relaxation: 'relaxation: mtgs coloring algorithm' = '" << color_algo_name << "' is not valid.\n";
381  msg << "Choices (not case sensitive) are: Default, Serial, VB, VBBIT, VBCS, VBD, VBDBIT, EB.";
383  true, std::invalid_argument, msg.str());
384  }
385 
386  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
387 
388  // for Two-stage Gauss-Seidel
389  if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
390  // Make sure that ST=complex can run with a damping factor that is
391  // a double.
392  ST df = pl.get<double>("relaxation: inner damping factor");
393  pl.remove("relaxation: inner damping factor");
394  pl.set("relaxation: inner damping factor",df);
395  }
396  //If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
397  if (long_row_threshold > 0) {
399  cluster_size != 1, std::invalid_argument, "Ifpack2::Relaxation: "
400  "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
402  precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
403  std::invalid_argument, "Ifpack2::Relaxation: "
404  "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
405  "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
406  }
407 
408  const ST innerDampingFactor = pl.get<ST> ("relaxation: inner damping factor");
409  const int numInnerSweeps = pl.get<int> ("relaxation: inner sweeps");
410  const int numOuterSweeps = pl.get<int> ("relaxation: outer sweeps");
411  const bool innerSpTrsv = pl.get<bool> ("relaxation: inner sparse-triangular solve");
412  const bool compactForm = pl.get<bool> ("relaxation: compact form");
413 
414  // "Commit" the changes, now that we've validated everything.
415  PrecType_ = precType;
416  NumSweeps_ = numSweeps;
417  DampingFactor_ = dampingFactor;
418  ZeroStartingSolution_ = zeroStartSol;
419  DoBackwardGS_ = doBackwardGS;
420  DoL1Method_ = doL1Method;
421  L1Eta_ = l1Eta;
422  MinDiagonalValue_ = minDiagonalValue;
423  fixTinyDiagEntries_ = fixTinyDiagEntries;
424  checkDiagEntries_ = checkDiagEntries;
425  clusterSize_ = cluster_size;
426  longRowThreshold_ = long_row_threshold;
427  is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
428  ifpack2_dump_matrix_ = ifpack2_dump_matrix;
429  localSmoothingIndices_ = localSmoothingIndices;
430  // for Two-stage GS
431  NumInnerSweeps_ = numInnerSweeps;
432  NumOuterSweeps_ = numOuterSweeps;
433  InnerSpTrsv_ = innerSpTrsv;
434  InnerDampingFactor_ = innerDampingFactor;
435  CompactForm_ = compactForm;
436  TimerForApply_ = timer_for_apply;
437 }
438 
439 
440 template<class MatrixType>
442 {
443  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
444  // but otherwise, we will get [unused] in pl
445  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
446 }
447 
448 
449 template<class MatrixType>
453  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
454  "The input matrix A is null. Please call setMatrix() with a nonnull "
455  "input matrix before calling this method.");
456  return A_->getRowMap ()->getComm ();
457 }
458 
459 
460 template<class MatrixType>
463  return A_;
464 }
465 
466 
467 template<class MatrixType>
468 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
469  typename MatrixType::global_ordinal_type,
470  typename MatrixType::node_type> >
473  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
474  "The input matrix A is null. Please call setMatrix() with a nonnull "
475  "input matrix before calling this method.");
476  return A_->getDomainMap ();
477 }
478 
479 
480 template<class MatrixType>
481 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
482  typename MatrixType::global_ordinal_type,
483  typename MatrixType::node_type> >
486  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
487  "The input matrix A is null. Please call setMatrix() with a nonnull "
488  "input matrix before calling this method.");
489  return A_->getRangeMap ();
490 }
491 
492 
493 template<class MatrixType>
495  return true;
496 }
497 
498 
499 template<class MatrixType>
501  return(NumInitialize_);
502 }
503 
504 
505 template<class MatrixType>
507  return(NumCompute_);
508 }
509 
510 
511 template<class MatrixType>
513  return(NumApply_);
514 }
515 
516 
517 template<class MatrixType>
519  return(InitializeTime_);
520 }
521 
522 
523 template<class MatrixType>
525  return(ComputeTime_);
526 }
527 
528 
529 template<class MatrixType>
531  return(ApplyTime_);
532 }
533 
534 
535 template<class MatrixType>
537  return(ComputeFlops_);
538 }
539 
540 
541 template<class MatrixType>
543  return(ApplyFlops_);
544 }
545 
546 
547 
548 template<class MatrixType>
551  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
552  "The input matrix A is null. Please call setMatrix() with a nonnull "
553  "input matrix, then call compute(), before calling this method.");
554  // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
555  return A_->getLocalNumRows() + A_->getLocalNumEntries();
556 }
557 
558 
559 template<class MatrixType>
560 void
562 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
563  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
564  Teuchos::ETransp /* mode */,
565  scalar_type alpha,
566  scalar_type beta) const
567 {
568  using Teuchos::as;
569  using Teuchos::RCP;
570  using Teuchos::rcp;
571  using Teuchos::rcpFromRef;
572  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
575  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
576  "The input matrix A is null. Please call setMatrix() with a nonnull "
577  "input matrix, then call compute(), before calling this method.");
579  ! isComputed (),
580  std::runtime_error,
581  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
582  "preconditioner instance before you may call apply(). You may call "
583  "isComputed() to find out if compute() has been called already.");
584  TEUCHOS_TEST_FOR_EXCEPTION(
585  X.getNumVectors() != Y.getNumVectors(),
586  std::runtime_error,
587  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
588  "X has " << X.getNumVectors() << " columns, but Y has "
589  << Y.getNumVectors() << " columns.");
590  TEUCHOS_TEST_FOR_EXCEPTION(
591  beta != STS::zero (), std::logic_error,
592  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
593  "implemented.");
594 
596  const std::string timerName ("Ifpack2::Relaxation::apply");
597  if (TimerForApply_) {
598  timer = Teuchos::TimeMonitor::lookupCounter (timerName);
599  if (timer.is_null ()) {
600  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
601  }
602  }
603 
604  Teuchos::Time time = Teuchos::Time(timerName);
605  double startTime = time.wallTime();
606 
607  {
609  if (TimerForApply_)
610  timeMon = Teuchos::rcp(new Teuchos::TimeMonitor(*timer));
611 
612  // Special case: alpha == 0.
613  if (alpha == STS::zero ()) {
614  // No floating-point operations, so no need to update a count.
615  Y.putScalar (STS::zero ());
616  }
617  else {
618  // If X and Y alias one another, then we need to create an
619  // auxiliary vector, Xcopy (a deep copy of X).
620  RCP<const MV> Xcopy;
621  {
622  if (X.aliases(Y)) {
623  Xcopy = rcp (new MV (X, Teuchos::Copy));
624  } else {
625  Xcopy = rcpFromRef (X);
626  }
627  }
628 
629  // Each of the following methods updates the flop count itself.
630  // All implementations handle zeroing out the starting solution
631  // (if necessary) themselves.
632  switch (PrecType_) {
633  case Ifpack2::Details::JACOBI:
634  ApplyInverseJacobi(*Xcopy,Y);
635  break;
636  case Ifpack2::Details::GS:
637  ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
638  break;
639  case Ifpack2::Details::SGS:
640  ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
641  break;
642  case Ifpack2::Details::MTGS:
643  case Ifpack2::Details::GS2:
644  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
645  break;
646  case Ifpack2::Details::MTSGS:
647  case Ifpack2::Details::SGS2:
648  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
649  break;
650  case Ifpack2::Details::RICHARDSON:
651  ApplyInverseRichardson(*Xcopy,Y);
652  break;
653 
654  default:
655  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
656  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
657  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
658  }
659  if (alpha != STS::one ()) {
660  Y.scale (alpha);
661  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
662  const double numVectors = as<double> (Y.getNumVectors ());
663  ApplyFlops_ += numGlobalRows * numVectors;
664  }
665  }
666  }
667  ApplyTime_ += (time.wallTime() - startTime);
668  ++NumApply_;
669 }
670 
671 
672 template<class MatrixType>
673 void
675 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
676  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
677  Teuchos::ETransp mode) const
678 {
680  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
681  "The input matrix A is null. Please call setMatrix() with a nonnull "
682  "input matrix, then call compute(), before calling this method.");
684  ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
685  "isComputed() must be true before you may call applyMat(). "
686  "Please call compute() before calling this method.");
687  TEUCHOS_TEST_FOR_EXCEPTION(
688  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
689  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
690  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
691  A_->apply (X, Y, mode);
692 }
693 
694 
695 template<class MatrixType>
697 {
698  const char methodName[] = "Ifpack2::Relaxation::initialize";
699 
701  (A_.is_null (), std::runtime_error, methodName << ": The "
702  "input matrix A is null. Please call setMatrix() with "
703  "a nonnull input matrix before calling this method.");
704 
707 
708  double startTime = timer->wallTime();
709 
710  { // Timing of initialize starts here
711  Teuchos::TimeMonitor timeMon (*timer);
712  isInitialized_ = false;
713 
714  {
715  auto rowMap = A_->getRowMap ();
716  auto comm = rowMap.is_null () ? Teuchos::null : rowMap->getComm ();
717  IsParallel_ = ! comm.is_null () && comm->getSize () > 1;
718  }
719 
720  // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
721  // but in that case, the domain and column Maps are the same and
722  // we don't need to Import anyway.
723  //
724  // mfh 07 May 2019: A_->getGraph() might be an
725  // OverlappingRowGraph, which doesn't have an Import object.
726  // However, in that case, the comm should have size 1.
727  Importer_ = IsParallel_ ? A_->getGraph ()->getImporter () :
728  Teuchos::null;
729 
730  {
732  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
733  hasBlockCrsMatrix_ = ! A_bcrs.is_null ();
734  }
735 
736  serialGaussSeidel_ = Teuchos::null;
737 
738  if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
739  PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
740  auto crsMat = Details::getCrsMatrix(A_);
742  (crsMat.is_null(), std::logic_error, methodName << ": "
743  "Multithreaded Gauss-Seidel methods currently only work "
744  "when the input matrix is a Tpetra::CrsMatrix.");
745 
746  // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
747  // compute, not initialize, since users may change the matrix's
748  // values at any time before calling compute.
749  if (ifpack2_dump_matrix_) {
750  static int sequence_number = 0;
751  const std::string file_name = "Ifpack2_MT_GS_" +
752  std::to_string (sequence_number++) + ".mtx";
753  using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
754  writer_type::writeSparseFile (file_name, crsMat);
755  }
756 
757  this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
758  if (mtKernelHandle_->get_gs_handle () == nullptr) {
759  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
760  mtKernelHandle_->create_gs_handle (KokkosSparse::GS_TWOSTAGE);
761  else if(this->clusterSize_ == 1) {
762  mtKernelHandle_->create_gs_handle (KokkosSparse::GS_DEFAULT, this->mtColoringAlgorithm_);
763  mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
764  }
765  else
766  mtKernelHandle_->create_gs_handle (KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_, this->mtColoringAlgorithm_);
767  }
768  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
769  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
770  // set parameters for two-stage GS
771  mtKernelHandle_->set_gs_set_num_inner_sweeps (NumInnerSweeps_);
772  mtKernelHandle_->set_gs_set_num_outer_sweeps (NumOuterSweeps_);
773  mtKernelHandle_->set_gs_set_inner_damp_factor (InnerDampingFactor_);
774  mtKernelHandle_->set_gs_twostage (!InnerSpTrsv_, A_->getLocalNumRows ());
775  mtKernelHandle_->set_gs_twostage_compact_form (CompactForm_);
776  }
777 
778  KokkosSparse::Experimental::gauss_seidel_symbolic(
779  mtKernelHandle_.getRawPtr (),
780  A_->getLocalNumRows (),
781  A_->getLocalNumCols (),
782  kcsr.graph.row_map,
783  kcsr.graph.entries,
784  is_matrix_structurally_symmetric_);
785  }
786  } // timing of initialize stops here
787 
788  InitializeTime_ += (timer->wallTime() - startTime);
789  ++NumInitialize_;
790  isInitialized_ = true;
791 }
792 
793 namespace Impl {
794 template <typename BlockDiagView>
795 struct InvertDiagBlocks {
796  typedef typename BlockDiagView::size_type Size;
797 
798 private:
799  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
800  template <typename View>
801  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
802  typename View::device_type, Unmanaged>;
803 
804  typedef typename BlockDiagView::non_const_value_type Scalar;
805  typedef typename BlockDiagView::device_type Device;
806  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
807  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
808 
809  UnmanagedView<BlockDiagView> block_diag_;
810  // TODO Use thread team and scratch memory space. In this first
811  // pass, provide workspace for each block.
812  RWrk rwrk_buf_;
813  UnmanagedView<RWrk> rwrk_;
814  IWrk iwrk_buf_;
815  UnmanagedView<IWrk> iwrk_;
816 
817 public:
818  InvertDiagBlocks (BlockDiagView& block_diag)
819  : block_diag_(block_diag)
820  {
821  const auto blksz = block_diag.extent(1);
822  Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
823  rwrk_ = rwrk_buf_;
824  Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
825  iwrk_ = iwrk_buf_;
826  }
827 
828  KOKKOS_INLINE_FUNCTION
829  void operator() (const Size i, int& jinfo) const {
830  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
831  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
832  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
833  int info = 0;
834  Tpetra::GETF2(D_cur, ipiv, info);
835  if (info) {
836  ++jinfo;
837  return;
838  }
839  Tpetra::GETRI(D_cur, ipiv, work, info);
840  if (info) ++jinfo;
841  }
842 };
843 }
844 
845 template<class MatrixType>
846 void Relaxation<MatrixType>::computeBlockCrs ()
847 {
848  using Kokkos::ALL;
849  using Teuchos::Array;
850  using Teuchos::ArrayRCP;
851  using Teuchos::ArrayView;
852  using Teuchos::as;
853  using Teuchos::Comm;
854  using Teuchos::RCP;
855  using Teuchos::rcp;
856  using Teuchos::REDUCE_MAX;
857  using Teuchos::REDUCE_MIN;
858  using Teuchos::REDUCE_SUM;
859  using Teuchos::rcp_dynamic_cast;
860  using Teuchos::reduceAll;
861  typedef local_ordinal_type LO;
862 
863  const std::string timerName ("Ifpack2::Relaxation::computeBlockCrs");
865  if (timer.is_null ()) {
866  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
867  }
868  double startTime = timer->wallTime();
869  {
870  Teuchos::TimeMonitor timeMon (*timer);
871 
873  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
874  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
875  "with a nonnull input matrix, then call initialize(), before calling "
876  "this method.");
877  auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type> (A_);
879  blockCrsA.is_null(), std::logic_error, "Ifpack2::Relaxation::"
880  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
881  "got this far. Please report this bug to the Ifpack2 developers.");
882 
883  const scalar_type one = STS::one ();
884 
885  // Reset state.
886  IsComputed_ = false;
887 
888  const LO lclNumMeshRows =
889  blockCrsA->getCrsGraph ().getLocalNumRows ();
890  const LO blockSize = blockCrsA->getBlockSize ();
891 
892  if (! savedDiagOffsets_) {
893  blockDiag_ = block_diag_type (); // clear it before reallocating
894  blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
895  lclNumMeshRows, blockSize, blockSize);
896  if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
897  // Clear diagOffsets_ first (by assigning an empty View to it)
898  // to save memory, before reallocating.
899  diagOffsets_ = Kokkos::View<size_t*, device_type> ();
900  diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
901  }
902  blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
904  (static_cast<size_t> (diagOffsets_.extent (0)) !=
905  static_cast<size_t> (blockDiag_.extent (0)),
906  std::logic_error, "diagOffsets_.extent(0) = " <<
907  diagOffsets_.extent (0) << " != blockDiag_.extent(0) = "
908  << blockDiag_.extent (0) <<
909  ". Please report this bug to the Ifpack2 developers.");
910  savedDiagOffsets_ = true;
911  }
912  blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
913 
914  // Use an unmanaged View in this method, so that when we take
915  // subviews of it (to get each diagonal block), we don't have to
916  // touch the reference count. Reference count updates are a
917  // thread scalability bottleneck and have a performance cost even
918  // without using threads.
919  unmanaged_block_diag_type blockDiag = blockDiag_;
920 
921  if (DoL1Method_ && IsParallel_) {
922  const scalar_type two = one + one;
923  const size_t maxLength = A_->getLocalMaxNumRowEntries ();
924  nonconst_local_inds_host_view_type indices ("indices",maxLength);
925  nonconst_values_host_view_type values_ ("values",maxLength * blockSize * blockSize);
926  size_t numEntries = 0;
927 
928  for (LO i = 0; i < lclNumMeshRows; ++i) {
929  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
930  blockCrsA->getLocalRowCopy (i, indices, values_, numEntries);
931  scalar_type * values = reinterpret_cast<scalar_type*>(values_.data());
932 
933  auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
934  for (LO subRow = 0; subRow < blockSize; ++subRow) {
935  magnitude_type diagonal_boost = STM::zero ();
936  for (size_t k = 0 ; k < numEntries ; ++k) {
937  if (indices[k] >= lclNumMeshRows) {
938  const size_t offset = blockSize*blockSize*k + subRow*blockSize;
939  for (LO subCol = 0; subCol < blockSize; ++subCol) {
940  diagonal_boost += STS::magnitude (values[offset+subCol] / two);
941  }
942  }
943  }
944  if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
945  diagBlock(subRow, subRow) += diagonal_boost;
946  }
947  }
948  }
949  }
950 
951  int info = 0;
952  {
953  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
954  typedef typename unmanaged_block_diag_type::execution_space exec_space;
955  typedef Kokkos::RangePolicy<exec_space, LO> range_type;
956 
957  Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
958  // FIXME (mfh 19 May 2017) Different processes may not
959  // necessarily have this error all at the same time, so it would
960  // be better just to preserve a local error state and let users
961  // check.
963  (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
964  << " diagonal blocks.");
965  }
966 
967  // In a debug build, do an extra test to make sure that all the
968  // factorizations were computed correctly.
969 #ifdef HAVE_IFPACK2_DEBUG
970  const int numResults = 2;
971  // Use "max = -min" trick to get min and max in a single all-reduce.
972  int lclResults[2], gblResults[2];
973  lclResults[0] = info;
974  lclResults[1] = -info;
975  gblResults[0] = 0;
976  gblResults[1] = 0;
977  reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
978  numResults, lclResults, gblResults);
980  (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
981  "Ifpack2::Relaxation::compute: When processing the input "
982  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
983  "failed on one or more (MPI) processes.");
984 #endif // HAVE_IFPACK2_DEBUG
985  serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
986  } // end TimeMonitor scope
987 
988  ComputeTime_ += (timer->wallTime() - startTime);
989  ++NumCompute_;
990  IsComputed_ = true;
991 }
992 
993 template<class MatrixType>
995 {
996  using Teuchos::Array;
997  using Teuchos::ArrayRCP;
998  using Teuchos::ArrayView;
999  using Teuchos::as;
1000  using Teuchos::Comm;
1001  using Teuchos::RCP;
1002  using Teuchos::rcp;
1003  using Teuchos::REDUCE_MAX;
1004  using Teuchos::REDUCE_MIN;
1005  using Teuchos::REDUCE_SUM;
1006  using Teuchos::reduceAll;
1007  using LO = local_ordinal_type;
1008  using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
1010  using IST = typename vector_type::impl_scalar_type;
1011  using KAT = Kokkos::ArithTraits<IST>;
1012 
1013  const char methodName[] = "Ifpack2::Relaxation::compute";
1014  const scalar_type zero = STS::zero ();
1015  const scalar_type one = STS::one ();
1016 
1018  (A_.is_null (), std::runtime_error, methodName << ": "
1019  "The input matrix A is null. Please call setMatrix() with a nonnull "
1020  "input matrix, then call initialize(), before calling this method.");
1021 
1022  // We don't count initialization in compute() time.
1023  if (! isInitialized ()) {
1024  initialize ();
1025  }
1026 
1027  if (hasBlockCrsMatrix_) {
1028  computeBlockCrs ();
1029  return;
1030  }
1031 
1034  double startTime = timer->wallTime();
1035 
1036  { // Timing of compute starts here.
1037  Teuchos::TimeMonitor timeMon (*timer);
1038 
1039  // Precompute some quantities for "fixing" zero or tiny diagonal
1040  // entries. We'll only use them if this "fixing" is enabled.
1041  //
1042  // SmallTraits covers for the lack of eps() in
1043  // Teuchos::ScalarTraits when its template parameter is not a
1044  // floating-point type. (Ifpack2 sometimes gets instantiated for
1045  // integer Scalar types.)
1046  IST oneOverMinDiagVal = KAT::one () / static_cast<IST> (SmallTraits<scalar_type>::eps ());
1047  if ( MinDiagonalValue_ != zero)
1048  oneOverMinDiagVal = KAT::one () / static_cast<IST> (MinDiagonalValue_);
1049 
1050  // It's helpful not to have to recompute this magnitude each time.
1051  const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1052 
1053  const LO numMyRows = static_cast<LO> (A_->getLocalNumRows ());
1054 
1056  (NumSweeps_ < 0, std::logic_error, methodName
1057  << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1058  "Please report this bug to the Ifpack2 developers.");
1059  IsComputed_ = false;
1060 
1061  if (Diagonal_.is_null()) {
1062  // A_->getLocalDiagCopy fills in all Vector entries, even if the
1063  // matrix has no stored entries in the corresponding rows.
1064  Diagonal_ = rcp (new vector_type (A_->getRowMap (), false));
1065  }
1066 
1067  if (checkDiagEntries_) {
1068  //
1069  // Check diagonal elements, replace zero elements with the minimum
1070  // diagonal value, and store their inverses. Count the number of
1071  // "small" and zero diagonal entries while we're at it.
1072  //
1073  size_t numSmallDiagEntries = 0; // "small" includes zero
1074  size_t numZeroDiagEntries = 0; // # zero diagonal entries
1075  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1076  magnitude_type minMagDiagEntryMag = STM::zero ();
1077  magnitude_type maxMagDiagEntryMag = STM::zero ();
1078 
1079  // FIXME: We are extracting the diagonal more than once. That
1080  // isn't very efficient, but we shouldn't be checking
1081  // diagonal entries at scale anyway.
1082  A_->getLocalDiagCopy (*Diagonal_); // slow path for row matrix
1083  std::unique_ptr<vector_type> origDiag;
1084  origDiag = std::unique_ptr<vector_type> (new vector_type (*Diagonal_, Teuchos::Copy));
1085  auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1086  auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1087  // As we go, keep track of the diagonal entries with the
1088  // least and greatest magnitude. We could use the trick of
1089  // starting min with +Inf and max with -Inf, but that
1090  // doesn't work if scalar_type is a built-in integer type.
1091  // Thus, we have to start by reading the first diagonal
1092  // entry redundantly.
1093  if (numMyRows != 0) {
1094  const magnitude_type d_0_mag = KAT::abs (diag(0));
1095  minMagDiagEntryMag = d_0_mag;
1096  maxMagDiagEntryMag = d_0_mag;
1097  }
1098 
1099  // Go through all the diagonal entries. Compute counts of
1100  // small-magnitude, zero, and negative-real-part entries.
1101  // Invert the diagonal entries that aren't too small. For
1102  // those too small in magnitude, replace them with
1103  // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1104  // happens to be zero).
1105  for (LO i = 0; i < numMyRows; ++i) {
1106  const IST d_i = diag(i);
1107  const magnitude_type d_i_mag = KAT::abs (d_i);
1108  // Work-around for GitHub Issue #5269.
1109  //const magnitude_type d_i_real = KAT::real (d_i);
1110  const auto d_i_real = getRealValue (d_i);
1111 
1112  // We can't compare complex numbers, but we can compare their
1113  // real parts.
1114  if (d_i_real < STM::zero ()) {
1115  ++numNegDiagEntries;
1116  }
1117  if (d_i_mag < minMagDiagEntryMag) {
1118  minMagDiagEntryMag = d_i_mag;
1119  }
1120  if (d_i_mag > maxMagDiagEntryMag) {
1121  maxMagDiagEntryMag = d_i_mag;
1122  }
1123 
1124  if (fixTinyDiagEntries_) {
1125  // <= not <, in case minDiagValMag is zero.
1126  if (d_i_mag <= minDiagValMag) {
1127  ++numSmallDiagEntries;
1128  if (d_i_mag == STM::zero ()) {
1129  ++numZeroDiagEntries;
1130  }
1131  diag(i) = oneOverMinDiagVal;
1132  }
1133  else {
1134  diag(i) = KAT::one () / d_i;
1135  }
1136  }
1137  else { // Don't fix zero or tiny diagonal entries.
1138  // <= not <, in case minDiagValMag is zero.
1139  if (d_i_mag <= minDiagValMag) {
1140  ++numSmallDiagEntries;
1141  if (d_i_mag == STM::zero ()) {
1142  ++numZeroDiagEntries;
1143  }
1144  }
1145  diag(i) = KAT::one () / d_i;
1146  }
1147  }
1148 
1149  // Collect global data about the diagonal entries. Only do this
1150  // (which involves O(1) all-reduces) if the user asked us to do
1151  // the extra work.
1152  //
1153  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1154  // zero rows. Fixing this requires one of two options:
1155  //
1156  // 1. Use a custom reduction operation that ignores processes
1157  // with zero diagonal entries.
1158  // 2. Split the communicator, compute all-reduces using the
1159  // subcommunicator over processes that have a nonzero number
1160  // of diagonal entries, and then broadcast from one of those
1161  // processes (if there is one) to the processes in the other
1162  // subcommunicator.
1163  const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1164 
1165  // Compute global min and max magnitude of entries.
1166  Array<magnitude_type> localVals (2);
1167  localVals[0] = minMagDiagEntryMag;
1168  // (- (min (- x))) is the same as (max x).
1169  localVals[1] = -maxMagDiagEntryMag;
1170  Array<magnitude_type> globalVals (2);
1171  reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1172  localVals.getRawPtr (),
1173  globalVals.getRawPtr ());
1174  globalMinMagDiagEntryMag_ = globalVals[0];
1175  globalMaxMagDiagEntryMag_ = -globalVals[1];
1176 
1177  Array<size_t> localCounts (3);
1178  localCounts[0] = numSmallDiagEntries;
1179  localCounts[1] = numZeroDiagEntries;
1180  localCounts[2] = numNegDiagEntries;
1181  Array<size_t> globalCounts (3);
1182  reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1183  localCounts.getRawPtr (),
1184  globalCounts.getRawPtr ());
1185  globalNumSmallDiagEntries_ = globalCounts[0];
1186  globalNumZeroDiagEntries_ = globalCounts[1];
1187  globalNumNegDiagEntries_ = globalCounts[2];
1188 
1189  // Compute and save the difference between the computed inverse
1190  // diagonal, and the original diagonal's inverse.
1191  vector_type diff (A_->getRowMap ());
1192  diff.reciprocal (*origDiag);
1193  diff.update (-one, *Diagonal_, one);
1194  globalDiagNormDiff_ = diff.norm2 ();
1195  }
1196 
1197  // Extract the diagonal entries. The CrsMatrix graph version is
1198  // faster than the row matrix version for subsequent calls to
1199  // compute(), since it caches the diagonal offsets.
1200 
1201  bool debugAgainstSlowPath = false;
1202 
1203  auto crsMat = Details::getCrsMatrix(A_);
1204 
1205  if (crsMat.get() && crsMat->isFillComplete ()) {
1206  // The invDiagKernel object computes diagonal offsets if
1207  // necessary. The "compute" call extracts diagonal enties,
1208  // optionally applies the L1 method and replacement of small
1209  // entries, and then inverts.
1210  if (invDiagKernel_.is_null())
1211  invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(crsMat));
1212  else
1213  invDiagKernel_->setMatrix(crsMat);
1214  invDiagKernel_->compute(*Diagonal_,
1215  DoL1Method_ && IsParallel_, L1Eta_,
1216  fixTinyDiagEntries_, minDiagValMag);
1217  savedDiagOffsets_ = true;
1218 
1219  // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1220  // environment variable to control this behavior at run time.
1221 #ifdef HAVE_IFPACK2_DEBUG
1222  debugAgainstSlowPath = true;
1223 #endif
1224  }
1225 
1226  if (crsMat.is_null() || ! crsMat->isFillComplete () || debugAgainstSlowPath) {
1227  // We could not call the CrsMatrix version above, or want to
1228  // debug by comparing against the slow path.
1229 
1230  // FIXME: The L1 method in this code path runs on host, since we
1231  // don't have device access for row matrices.
1232 
1233  RCP<vector_type> Diagonal;
1234  if (debugAgainstSlowPath)
1235  Diagonal = rcp(new vector_type(A_->getRowMap ()));
1236  else
1237  Diagonal = Diagonal_;
1238 
1239  A_->getLocalDiagCopy (*Diagonal); // slow path for row matrix
1240 
1241  // Setup for L1 Methods.
1242  // Here we add half the value of the off-processor entries in the row,
1243  // but only if diagonal isn't sufficiently large.
1244  //
1245  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1246  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1247  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1248  //
1249  if (DoL1Method_ && IsParallel_) {
1250  const row_matrix_type& A_row = *A_;
1251  auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1252  const magnitude_type two = STM::one () + STM::one ();
1253  const size_t maxLength = A_row.getLocalMaxNumRowEntries ();
1254  nonconst_local_inds_host_view_type indices("indices",maxLength);
1255  nonconst_values_host_view_type values("values",maxLength);
1256  size_t numEntries;
1257 
1258  for (LO i = 0; i < numMyRows; ++i) {
1259  A_row.getLocalRowCopy (i, indices, values, numEntries);
1260  magnitude_type diagonal_boost = STM::zero ();
1261  for (size_t k = 0 ; k < numEntries; ++k) {
1262  if (indices[k] >= numMyRows) {
1263  diagonal_boost += STS::magnitude (values[k] / two);
1264  }
1265  }
1266  if (KAT::magnitude (diag(i, 0)) < L1Eta_ * diagonal_boost) {
1267  diag(i, 0) += diagonal_boost;
1268  }
1269  }
1270  }
1271 
1272  //
1273  // Invert the matrix's diagonal entries (in Diagonal).
1274  //
1275  if (fixTinyDiagEntries_) {
1276  // Go through all the diagonal entries. Invert those that
1277  // aren't too small in magnitude. For those that are too
1278  // small in magnitude, replace them with oneOverMinDiagVal.
1279  auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1280  Kokkos::parallel_for(Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1281  KOKKOS_LAMBDA (local_ordinal_type i) {
1282  auto d_i = localDiag(i, 0);
1283  const magnitude_type d_i_mag = KAT::magnitude (d_i);
1284  // <= not <, in case minDiagValMag is zero.
1285  if (d_i_mag <= minDiagValMag) {
1286  d_i = oneOverMinDiagVal;
1287  }
1288  else {
1289  // For Stokhos types, operator/ returns an expression
1290  // type. Explicitly convert to IST before returning.
1291  d_i = IST (KAT::one () / d_i);
1292  }
1293  localDiag(i, 0) = d_i;
1294  });
1295  }
1296  else { // don't fix tiny or zero diagonal entries
1297  Diagonal->reciprocal (*Diagonal);
1298  }
1299 
1300  if (debugAgainstSlowPath) {
1301  // Validate the fast-path diagonal against the slow-path diagonal.
1302  Diagonal->update (STS::one (), *Diagonal_, -STS::one ());
1303  const magnitude_type err = Diagonal->normInf ();
1304  // The two diagonals should be exactly the same, so their
1305  // difference should be exactly zero.
1307  (err > 100*STM::eps(), std::logic_error, methodName << ": "
1308  << "\"fast-path\" diagonal computation failed. "
1309  "\\|D1 - D2\\|_inf = " << err << ".");
1310  }
1311  }
1312 
1313  // Count floating-point operations of computing the inverse diagonal.
1314  //
1315  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1316  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1317  ComputeFlops_ += 4.0 * numMyRows;
1318  }
1319  else {
1320  ComputeFlops_ += numMyRows;
1321  }
1322 
1323  if (PrecType_ == Ifpack2::Details::MTGS ||
1324  PrecType_ == Ifpack2::Details::MTSGS ||
1325  PrecType_ == Ifpack2::Details::GS2 ||
1326  PrecType_ == Ifpack2::Details::SGS2) {
1327 
1328  //KokkosKernels GaussSeidel Initialization.
1329  using scalar_view_t = typename local_matrix_device_type::values_type;
1330 
1332  (crsMat.is_null(), std::logic_error, methodName << ": "
1333  "Multithreaded Gauss-Seidel methods currently only work "
1334  "when the input matrix is a Tpetra::CrsMatrix.");
1335  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
1336 
1337  //TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1338  //const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1339  auto diagView_2d = Diagonal_->getLocalViewDevice (Tpetra::Access::ReadWrite);
1340  scalar_view_t diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1341  KokkosSparse::Experimental::gauss_seidel_numeric(
1342  mtKernelHandle_.getRawPtr (),
1343  A_->getLocalNumRows (),
1344  A_->getLocalNumCols (),
1345  kcsr.graph.row_map,
1346  kcsr.graph.entries,
1347  kcsr.values,
1348  diagView_1d,
1349  is_matrix_structurally_symmetric_);
1350  }
1351  else if(PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1352  if(crsMat)
1353  serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1354  else
1355  serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1356  }
1357  } // end TimeMonitor scope
1358 
1359  ComputeTime_ += (timer->wallTime() - startTime);
1360  ++NumCompute_;
1361  IsComputed_ = true;
1362 }
1363 
1364 
1365 template<class MatrixType>
1366 void
1368 ApplyInverseRichardson (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1369  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1370 {
1371  using Teuchos::as;
1372  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1373  const double numVectors = as<double> (X.getNumVectors ());
1374  if (ZeroStartingSolution_) {
1375  // For the first Richardson sweep, if we are allowed to assume that
1376  // the initial guess is zero, then Richardson is just alpha times the RHS
1377  // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1378  Y.scale(DampingFactor_,X);
1379 
1380  // Count (global) floating-point operations. Ifpack2 represents
1381  // this as a floating-point number rather than an integer, so that
1382  // overflow (for a very large number of calls, or a very large
1383  // problem) is approximate instead of catastrophic.
1384  double flopUpdate = 0.0;
1385  if (DampingFactor_ == STS::one ()) {
1386  // Y(i,j) = X(i,j): one multiply for each entry of Y.
1387  flopUpdate = numGlobalRows * numVectors;
1388  } else {
1389  // Y(i,j) = DampingFactor_ * X(i,j):
1390  // One multiplies per entry of Y.
1391  flopUpdate = numGlobalRows * numVectors;
1392  }
1393  ApplyFlops_ += flopUpdate;
1394  if (NumSweeps_ == 1) {
1395  return;
1396  }
1397  }
1398  // If we were allowed to assume that the starting guess was zero,
1399  // then we have already done the first sweep above.
1400  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1401 
1402  // Allocate a multivector if the cached one isn't perfect
1403  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1404 
1405  for (int j = startSweep; j < NumSweeps_; ++j) {
1406  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1407  Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1408  Y.update(DampingFactor_,*cachedMV_,STS::one());
1409  }
1410 
1411  // For each column of output, for each pass over the matrix:
1412  //
1413  // - One + and one * for each matrix entry
1414  // - One / and one + for each row of the matrix
1415  // - If the damping factor is not one: one * for each row of the
1416  // matrix. (It's not fair to count this if the damping factor is
1417  // one, since the implementation could skip it. Whether it does
1418  // or not is the implementation's choice.)
1419 
1420  // Floating-point operations due to the damping factor, per matrix
1421  // row, per direction, per columm of output.
1422  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1423  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1424  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1425  (2.0 * numGlobalNonzeros + dampingFlops);
1426 }
1427 
1428 
1429 template<class MatrixType>
1430 void
1431 Relaxation<MatrixType>::
1432 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1433  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1434 {
1435  using Teuchos::as;
1436  if (hasBlockCrsMatrix_) {
1437  ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1438  return;
1439  }
1440 
1441  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1442  const double numVectors = as<double> (X.getNumVectors ());
1443  if (ZeroStartingSolution_) {
1444  // For the first Jacobi sweep, if we are allowed to assume that
1445  // the initial guess is zero, then Jacobi is just diagonal
1446  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1447  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1448  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1449 
1450  // Count (global) floating-point operations. Ifpack2 represents
1451  // this as a floating-point number rather than an integer, so that
1452  // overflow (for a very large number of calls, or a very large
1453  // problem) is approximate instead of catastrophic.
1454  double flopUpdate = 0.0;
1455  if (DampingFactor_ == STS::one ()) {
1456  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1457  flopUpdate = numGlobalRows * numVectors;
1458  } else {
1459  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1460  // Two multiplies per entry of Y.
1461  flopUpdate = 2.0 * numGlobalRows * numVectors;
1462  }
1463  ApplyFlops_ += flopUpdate;
1464  if (NumSweeps_ == 1) {
1465  return;
1466  }
1467  }
1468  // If we were allowed to assume that the starting guess was zero,
1469  // then we have already done the first sweep above.
1470  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1471 
1472  // Allocate a multivector if the cached one isn't perfect
1473  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1474 
1475  for (int j = startSweep; j < NumSweeps_; ++j) {
1476  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1477  Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1478  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1479  }
1480 
1481  // For each column of output, for each pass over the matrix:
1482  //
1483  // - One + and one * for each matrix entry
1484  // - One / and one + for each row of the matrix
1485  // - If the damping factor is not one: one * for each row of the
1486  // matrix. (It's not fair to count this if the damping factor is
1487  // one, since the implementation could skip it. Whether it does
1488  // or not is the implementation's choice.)
1489 
1490  // Floating-point operations due to the damping factor, per matrix
1491  // row, per direction, per columm of output.
1492  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1493  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1494  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1495  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1496 }
1497 
1498 template<class MatrixType>
1499 void
1500 Relaxation<MatrixType>::
1501 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1502  local_ordinal_type,
1503  global_ordinal_type,
1504  node_type>& X,
1505  Tpetra::MultiVector<scalar_type,
1506  local_ordinal_type,
1507  global_ordinal_type,
1508  node_type>& Y) const
1509 {
1510  using Tpetra::BlockMultiVector;
1511  using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1512  global_ordinal_type, node_type>;
1513 
1514  const block_crs_matrix_type* blockMatConst =
1515  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1517  (blockMatConst == nullptr, std::logic_error, "This method should "
1518  "never be called if the matrix A_ is not a BlockCrsMatrix. "
1519  "Please report this bug to the Ifpack2 developers.");
1520  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1521  // This is because applyBlock() is nonconst (more accurate), while
1522  // apply() is const (required by Tpetra::Operator interface, but a
1523  // lie, because it possibly allocates temporary buffers).
1524  block_crs_matrix_type* blockMat =
1525  const_cast<block_crs_matrix_type*> (blockMatConst);
1526 
1527  auto meshRowMap = blockMat->getRowMap ();
1528  auto meshColMap = blockMat->getColMap ();
1529  const local_ordinal_type blockSize = blockMat->getBlockSize ();
1530 
1531  BMV X_blk (X, *meshColMap, blockSize);
1532  BMV Y_blk (Y, *meshRowMap, blockSize);
1533 
1534  if (ZeroStartingSolution_) {
1535  // For the first sweep, if we are allowed to assume that the
1536  // initial guess is zero, then block Jacobi is just block diagonal
1537  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1538  Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1539  if (NumSweeps_ == 1) {
1540  return;
1541  }
1542  }
1543 
1544  auto pointRowMap = Y.getMap ();
1545  const size_t numVecs = X.getNumVectors ();
1546 
1547  // We don't need to initialize the result MV, since the sparse
1548  // mat-vec will clobber its contents anyway.
1549  BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1550 
1551  // If we were allowed to assume that the starting guess was zero,
1552  // then we have already done the first sweep above.
1553  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1554 
1555  for (int j = startSweep; j < NumSweeps_; ++j) {
1556  blockMat->applyBlock (Y_blk, A_times_Y);
1557  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1558  Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1559  X_blk, A_times_Y, STS::one ());
1560  }
1561 }
1562 
1563 template<class MatrixType>
1564 void
1565 Relaxation<MatrixType>::
1566 ApplyInverseSerialGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1567  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1568  Tpetra::ESweepDirection direction) const
1569 {
1570  using this_type = Relaxation<MatrixType>;
1571  // The CrsMatrix version is faster, because it can access the sparse
1572  // matrix data directly, rather than by copying out each row's data
1573  // in turn. Thus, we check whether the RowMatrix is really a
1574  // CrsMatrix.
1575  //
1576  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1577  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1578  // will still be correct if the cast fails, but it will use an
1579  // unoptimized kernel.
1580  auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
1581  auto crsMat = Details::getCrsMatrix(A_);
1582  if (blockCrsMat.get()) {
1583  const_cast<this_type&> (*this).ApplyInverseSerialGS_BlockCrsMatrix (*blockCrsMat, X, Y, direction);
1584  }
1585  else if (crsMat.get()) {
1586  ApplyInverseSerialGS_CrsMatrix (*crsMat, X, Y, direction);
1587  }
1588  else {
1589  ApplyInverseSerialGS_RowMatrix (X, Y, direction);
1590  }
1591 }
1592 
1593 
1594 template<class MatrixType>
1595 void
1596 Relaxation<MatrixType>::
1597 ApplyInverseSerialGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1598  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1599  Tpetra::ESweepDirection direction) const {
1600  using Teuchos::Array;
1601  using Teuchos::ArrayRCP;
1602  using Teuchos::ArrayView;
1603  using Teuchos::as;
1604  using Teuchos::RCP;
1605  using Teuchos::rcp;
1606  using Teuchos::rcpFromRef;
1607  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1608 
1609  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1610  // starting multivector itself. The generic RowMatrix version here
1611  // does not, so we have to zero out Y here.
1612  if (ZeroStartingSolution_) {
1613  Y.putScalar (STS::zero ());
1614  }
1615 
1616  size_t NumVectors = X.getNumVectors();
1617 
1618  RCP<MV> Y2;
1619  if (IsParallel_) {
1620  if (Importer_.is_null ()) { // domain and column Maps are the same.
1621  updateCachedMultiVector (Y.getMap (), NumVectors);
1622  }
1623  else {
1624  updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
1625  }
1626  Y2 = cachedMV_;
1627  }
1628  else {
1629  Y2 = rcpFromRef (Y);
1630  }
1631 
1632  for (int j = 0; j < NumSweeps_; ++j) {
1633  // data exchange is here, once per sweep
1634  if (IsParallel_) {
1635  if (Importer_.is_null ()) {
1636  // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1637  // clear if this is correct. Reevaluate at some point.
1638 
1639  *Y2 = Y; // just copy, since domain and column Maps are the same
1640  } else {
1641  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1642  }
1643  }
1644  serialGaussSeidel_->apply(*Y2, X, direction);
1645 
1646  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1647  if (IsParallel_) {
1648  Tpetra::deep_copy (Y, *Y2);
1649  }
1650  }
1651 
1652  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1653  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1654  const double numVectors = as<double> (X.getNumVectors ());
1655  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1656  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1657  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1658  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1659 }
1660 
1661 
1662 template<class MatrixType>
1663 void
1664 Relaxation<MatrixType>::
1665 ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1666  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1667  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1668  Tpetra::ESweepDirection direction) const
1669 {
1670  using Teuchos::null;
1671  using Teuchos::RCP;
1672  using Teuchos::rcp;
1673  using Teuchos::rcpFromRef;
1674  using Teuchos::rcp_const_cast;
1675  typedef scalar_type Scalar;
1676  const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1677  const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1678 
1680  ! A.isFillComplete (), std::runtime_error,
1681  prefix << "The matrix is not fill complete.");
1683  NumSweeps_ < 0, std::invalid_argument,
1684  prefix << "The number of sweeps must be nonnegative, "
1685  "but you provided numSweeps = " << NumSweeps_ << " < 0.");
1686 
1687  if (NumSweeps_ == 0) {
1688  return;
1689  }
1690 
1691  RCP<const import_type> importer = A.getGraph ()->getImporter ();
1692 
1693  RCP<const map_type> domainMap = A.getDomainMap ();
1694  RCP<const map_type> rangeMap = A.getRangeMap ();
1695  RCP<const map_type> rowMap = A.getGraph ()->getRowMap ();
1696  RCP<const map_type> colMap = A.getGraph ()->getColMap ();
1697 
1698 #ifdef HAVE_IFPACK2_DEBUG
1699  {
1700  // The relation 'isSameAs' is transitive. It's also a
1701  // collective, so we don't have to do a "shared" test for
1702  // exception (i.e., a global reduction on the test value).
1704  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1705  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1706  "multivector X be in the domain Map of the matrix.");
1708  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1709  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1710  "B be in the range Map of the matrix.");
1712  ! Diagonal_->getMap ()->isSameAs (*rowMap), std::runtime_error,
1713  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1714  "D be in the row Map of the matrix.");
1716  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1717  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1718  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1720  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1721  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1722  "the range Map of the matrix be the same.");
1723  }
1724 #endif
1725 
1726  // Fetch a (possibly cached) temporary column Map multivector
1727  // X_colMap, and a domain Map view X_domainMap of it. Both have
1728  // constant stride by construction. We know that the domain Map
1729  // must include the column Map, because our Gauss-Seidel kernel
1730  // requires that the row Map, domain Map, and range Map are all
1731  // the same, and that each process owns all of its own diagonal
1732  // entries of the matrix.
1733 
1734  RCP<multivector_type> X_colMap;
1735  RCP<multivector_type> X_domainMap;
1736  bool copyBackOutput = false;
1737  if (importer.is_null ()) {
1738  X_colMap = Teuchos::rcpFromRef (X);
1739  X_domainMap = Teuchos::rcpFromRef (X);
1740  // Column Map and domain Map are the same, so there are no
1741  // remote entries. Thus, if we are not setting the initial
1742  // guess to zero, we don't have to worry about setting remote
1743  // entries to zero, even though we are not doing an Import in
1744  // this case.
1745  if (ZeroStartingSolution_) {
1746  X_colMap->putScalar (ZERO);
1747  }
1748  // No need to copy back to X at end.
1749  }
1750  else { // Column Map and domain Map are _not_ the same.
1751  updateCachedMultiVector(colMap, X.getNumVectors());
1752  X_colMap = cachedMV_;
1753  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1754 
1755  if (ZeroStartingSolution_) {
1756  // No need for an Import, since we're filling with zeros.
1757  X_colMap->putScalar (ZERO);
1758  } else {
1759  // We could just copy X into X_domainMap. However, that
1760  // wastes a copy, because the Import also does a copy (plus
1761  // communication). Since the typical use case for
1762  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1763  // don't want to waste that copy. Thus, we do the Import
1764  // here, and skip the first Import in the first sweep.
1765  // Importing directly from X effects the copy into X_domainMap
1766  // (which is a view of X_colMap).
1767  X_colMap->doImport (X, *importer, Tpetra::INSERT);
1768  }
1769  copyBackOutput = true; // Don't forget to copy back at end.
1770  } // if column and domain Maps are (not) the same
1771 
1772  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1773  if (! importer.is_null () && sweep > 0) {
1774  // We already did the first Import for the zeroth sweep above,
1775  // if it was necessary.
1776  X_colMap->doImport (*X_domainMap, *importer, Tpetra::INSERT);
1777  }
1778  // Do local Gauss-Seidel (forward, backward or symmetric)
1779  serialGaussSeidel_->apply(*X_colMap, B, direction);
1780  }
1781 
1782  if (copyBackOutput) {
1783  try {
1784  deep_copy (X , *X_domainMap); // Copy result back into X.
1785  } catch (std::exception& e) {
1787  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1788  "threw an exception: " << e.what ());
1789  }
1790  }
1791 
1792  // For each column of output, for each sweep over the matrix:
1793  //
1794  // - One + and one * for each matrix entry
1795  // - One / and one + for each row of the matrix
1796  // - If the damping factor is not one: one * for each row of the
1797  // matrix. (It's not fair to count this if the damping factor is
1798  // one, since the implementation could skip it. Whether it does
1799  // or not is the implementation's choice.)
1800 
1801  // Floating-point operations due to the damping factor, per matrix
1802  // row, per direction, per columm of output.
1803  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1804  const double numVectors = X.getNumVectors ();
1805  const double numGlobalRows = A_->getGlobalNumRows ();
1806  const double numGlobalNonzeros = A_->getGlobalNumEntries ();
1807  ApplyFlops_ += NumSweeps_ * numVectors *
1808  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1809 }
1810 
1811 template<class MatrixType>
1812 void
1813 Relaxation<MatrixType>::
1814 ApplyInverseSerialGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1815  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1816  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1817  Tpetra::ESweepDirection direction)
1818 {
1819  using Tpetra::INSERT;
1820  using Teuchos::RCP;
1821  using Teuchos::rcp;
1822  using Teuchos::rcpFromRef;
1823 
1824  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1825  //
1826  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1827  // multiple right-hand sides, unless the input or output MultiVector
1828  // does not have constant stride. We should check for that case
1829  // here, in case it doesn't work in localGaussSeidel (which is
1830  // entirely possible).
1831  block_multivector_type yBlock(Y, *(A.getGraph ()->getDomainMap()), A.getBlockSize());
1832  const block_multivector_type xBlock(X, *(A.getColMap ()), A.getBlockSize());
1833 
1834  bool performImport = false;
1835  RCP<block_multivector_type> yBlockCol;
1836  if (Importer_.is_null()) {
1837  yBlockCol = rcpFromRef(yBlock);
1838  }
1839  else {
1840  if (yBlockColumnPointMap_.is_null() ||
1841  yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1842  yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1843  yBlockColumnPointMap_ =
1844  rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1845  static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1846  }
1847  yBlockCol = yBlockColumnPointMap_;
1848  if (pointImporter_.is_null()) {
1849  auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1850  auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1851  pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1852  }
1853  performImport = true;
1854  }
1855 
1856  multivector_type yBlock_mv;
1857  multivector_type yBlockCol_mv;
1858  RCP<const multivector_type> yBlockColPointDomain;
1859  if (performImport) { // create views (shallow copies)
1860  yBlock_mv = yBlock.getMultiVectorView();
1861  yBlockCol_mv = yBlockCol->getMultiVectorView();
1862  yBlockColPointDomain =
1863  yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1864  }
1865 
1866  if (ZeroStartingSolution_) {
1867  yBlockCol->putScalar(STS::zero ());
1868  }
1869  else if (performImport) {
1870  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1871  }
1872 
1873  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1874  if (performImport && sweep > 0) {
1875  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1876  }
1877  serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1878  if (performImport) {
1879  Tpetra::deep_copy(Y, *yBlockColPointDomain);
1880  }
1881  }
1882 }
1883 
1884 template<class MatrixType>
1885 void
1886 Relaxation<MatrixType>::
1887 ApplyInverseMTGS_CrsMatrix(
1888  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1889  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1890  Tpetra::ESweepDirection direction) const
1891 {
1892  using Teuchos::null;
1893  using Teuchos::RCP;
1894  using Teuchos::rcp;
1895  using Teuchos::rcpFromRef;
1896  using Teuchos::rcp_const_cast;
1897  using Teuchos::as;
1898 
1899  typedef scalar_type Scalar;
1900 
1901  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1902  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1903 
1904  auto crsMat = Details::getCrsMatrix(A_);
1906  (crsMat.is_null(), std::logic_error, "Ifpack2::Relaxation::apply: "
1907  "Multithreaded Gauss-Seidel methods currently only work when the "
1908  "input matrix is a Tpetra::CrsMatrix.");
1909 
1910  //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1912  (! localSmoothingIndices_.is_null (), std::logic_error,
1913  "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1914  "implement the case where the user supplies an iteration order. "
1915  "This error used to appear as \"MT GaussSeidel ignores the given "
1916  "order\". "
1917  "I tried to add more explanation, but I didn't implement \"MT "
1918  "GaussSeidel\" [sic]. "
1919  "You'll have to ask the person who did.");
1920 
1922  (! crsMat->isFillComplete (), std::runtime_error, prefix << "The "
1923  "input CrsMatrix is not fill complete. Please call fillComplete "
1924  "on the matrix, then do setup again, before calling apply(). "
1925  "\"Do setup\" means that if only the matrix's values have changed "
1926  "since last setup, you need only call compute(). If the matrix's "
1927  "structure may also have changed, you must first call initialize(), "
1928  "then call compute(). If you have not set up this preconditioner "
1929  "for this matrix before, you must first call initialize(), then "
1930  "call compute().");
1932  (NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = "
1933  << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1934  "developers.");
1935  if (NumSweeps_ == 0) {
1936  return;
1937  }
1938 
1939  RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1941  ! crsMat->getGraph ()->getExporter ().is_null (), std::runtime_error,
1942  "This method's implementation currently requires that the matrix's row, "
1943  "domain, and range Maps be the same. This cannot be the case, because "
1944  "the matrix has a nontrivial Export object.");
1945 
1946  RCP<const map_type> domainMap = crsMat->getDomainMap ();
1947  RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1948  RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1949  RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1950 
1951 #ifdef HAVE_IFPACK2_DEBUG
1952  {
1953  // The relation 'isSameAs' is transitive. It's also a
1954  // collective, so we don't have to do a "shared" test for
1955  // exception (i.e., a global reduction on the test value).
1957  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1958  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1959  "multivector X be in the domain Map of the matrix.");
1961  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1962  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1963  "B be in the range Map of the matrix.");
1965  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1966  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1967  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1969  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1970  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1971  "the range Map of the matrix be the same.");
1972  }
1973 #else
1974  // Forestall any compiler warnings for unused variables.
1975  (void) rangeMap;
1976  (void) rowMap;
1977 #endif // HAVE_IFPACK2_DEBUG
1978 
1979  // Fetch a (possibly cached) temporary column Map multivector
1980  // X_colMap, and a domain Map view X_domainMap of it. Both have
1981  // constant stride by construction. We know that the domain Map
1982  // must include the column Map, because our Gauss-Seidel kernel
1983  // requires that the row Map, domain Map, and range Map are all
1984  // the same, and that each process owns all of its own diagonal
1985  // entries of the matrix.
1986 
1987  RCP<multivector_type> X_colMap;
1988  RCP<multivector_type> X_domainMap;
1989  bool copyBackOutput = false;
1990  if (importer.is_null ()) {
1991  if (X.isConstantStride ()) {
1992  X_colMap = rcpFromRef (X);
1993  X_domainMap = rcpFromRef (X);
1994 
1995  // Column Map and domain Map are the same, so there are no
1996  // remote entries. Thus, if we are not setting the initial
1997  // guess to zero, we don't have to worry about setting remote
1998  // entries to zero, even though we are not doing an Import in
1999  // this case.
2000  if (ZeroStartingSolution_) {
2001  X_colMap->putScalar (ZERO);
2002  }
2003  // No need to copy back to X at end.
2004  }
2005  else {
2006  // We must copy X into a constant stride multivector.
2007  // Just use the cached column Map multivector for that.
2008  // force=true means fill with zeros, so no need to fill
2009  // remote entries (not in domain Map) with zeros.
2010  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2011  X_colMap = cachedMV_;
2012  // X_domainMap is always a domain Map view of the column Map
2013  // multivector. In this case, the domain and column Maps are
2014  // the same, so X_domainMap _is_ X_colMap.
2015  X_domainMap = X_colMap;
2016  if (! ZeroStartingSolution_) { // Don't copy if zero initial guess
2017  try {
2018  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
2019  } catch (std::exception& e) {
2020  std::ostringstream os;
2021  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2022  "deep_copy(*X_domainMap, X) threw an exception: "
2023  << e.what () << ".";
2024  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2025  }
2026  }
2027  copyBackOutput = true; // Don't forget to copy back at end.
2028  /*
2029  TPETRA_EFFICIENCY_WARNING(
2030  ! X.isConstantStride (),
2031  std::runtime_error,
2032  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
2033  "kernel requires that X and B both have constant stride. Since X "
2034  "does not have constant stride, we had to make a copy. This is a "
2035  "limitation of the current implementation and not your fault, but we "
2036  "still report it as an efficiency warning for your information.");
2037  */
2038  }
2039  }
2040  else { // Column Map and domain Map are _not_ the same.
2041  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2042  X_colMap = cachedMV_;
2043 
2044  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
2045 
2046  if (ZeroStartingSolution_) {
2047  // No need for an Import, since we're filling with zeros.
2048  X_colMap->putScalar (ZERO);
2049  } else {
2050  // We could just copy X into X_domainMap. However, that
2051  // wastes a copy, because the Import also does a copy (plus
2052  // communication). Since the typical use case for
2053  // Gauss-Seidel is a small number of sweeps (2 is typical), we
2054  // don't want to waste that copy. Thus, we do the Import
2055  // here, and skip the first Import in the first sweep.
2056  // Importing directly from X effects the copy into X_domainMap
2057  // (which is a view of X_colMap).
2058  X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
2059  }
2060  copyBackOutput = true; // Don't forget to copy back at end.
2061  } // if column and domain Maps are (not) the same
2062 
2063  // The Gauss-Seidel / SOR kernel expects multivectors of constant
2064  // stride. X_colMap is by construction, but B might not be. If
2065  // it's not, we have to make a copy.
2066  RCP<const multivector_type> B_in;
2067  if (B.isConstantStride ()) {
2068  B_in = rcpFromRef (B);
2069  }
2070  else {
2071  // Range Map and row Map are the same in this case, so we can
2072  // use the cached row Map multivector to store a constant stride
2073  // copy of B.
2074  RCP<multivector_type> B_in_nonconst = rcp (new multivector_type(rowMap, B.getNumVectors()));
2075  try {
2076  deep_copy (*B_in_nonconst, B);
2077  } catch (std::exception& e) {
2078  std::ostringstream os;
2079  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2080  "deep_copy(*B_in_nonconst, B) threw an exception: "
2081  << e.what () << ".";
2082  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2083  }
2084  B_in = rcp_const_cast<const multivector_type> (B_in_nonconst);
2085 
2086  /*
2087  TPETRA_EFFICIENCY_WARNING(
2088  ! B.isConstantStride (),
2089  std::runtime_error,
2090  "MTGaussSeidel: The current implementation requires that B have "
2091  "constant stride. Since B does not have constant stride, we had to "
2092  "copy it into a separate constant-stride multivector. This is a "
2093  "limitation of the current implementation and not your fault, but we "
2094  "still report it as an efficiency warning for your information.");
2095  */
2096  }
2097 
2098  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
2099 
2100  bool update_y_vector = true;
2101  //false as it was done up already, and we dont want to zero it in each sweep.
2102  bool zero_x_vector = false;
2103 
2104  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2105  if (! importer.is_null () && sweep > 0) {
2106  // We already did the first Import for the zeroth sweep above,
2107  // if it was necessary.
2108  X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2109  }
2110 
2111  if (direction == Tpetra::Symmetric) {
2112  KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2113  (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2114  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2115  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2116  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2117  zero_x_vector, update_y_vector, DampingFactor_, 1);
2118  }
2119  else if (direction == Tpetra::Forward) {
2120  KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2121  (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2122  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2123  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2124  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2125  zero_x_vector, update_y_vector, DampingFactor_, 1);
2126  }
2127  else if (direction == Tpetra::Backward) {
2128  KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2129  (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2130  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2131  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2132  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2133  zero_x_vector, update_y_vector, DampingFactor_, 1);
2134  }
2135  else {
2137  true, std::invalid_argument,
2138  prefix << "The 'direction' enum does not have any of its valid "
2139  "values: Forward, Backward, or Symmetric.");
2140  }
2141  update_y_vector = false;
2142  }
2143 
2144  if (copyBackOutput) {
2145  try {
2146  deep_copy (X , *X_domainMap); // Copy result back into X.
2147  } catch (std::exception& e) {
2149  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2150  "threw an exception: " << e.what ());
2151  }
2152  }
2153 
2154  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2155  const double numVectors = as<double> (X.getNumVectors ());
2156  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2157  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2158  double ApplyFlops = NumSweeps_ * numVectors *
2159  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2160  if (direction == Tpetra::Symmetric)
2161  ApplyFlops *= 2.0;
2162  ApplyFlops_ += ApplyFlops;
2163 }
2164 
2165 template<class MatrixType>
2167 {
2168  std::ostringstream os;
2169 
2170  // Output is a valid YAML dictionary in flow style. If you don't
2171  // like everything on a single line, you should call describe()
2172  // instead.
2173  os << "\"Ifpack2::Relaxation\": {";
2174 
2175  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2176  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2177 
2178  // It's useful to print this instance's relaxation method (Jacobi,
2179  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2180  // than that, call describe() instead.
2181  os << "Type: ";
2182  if (PrecType_ == Ifpack2::Details::JACOBI) {
2183  os << "Jacobi";
2184  } else if (PrecType_ == Ifpack2::Details::GS) {
2185  os << "Gauss-Seidel";
2186  } else if (PrecType_ == Ifpack2::Details::SGS) {
2187  os << "Symmetric Gauss-Seidel";
2188  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2189  os << "MT Gauss-Seidel";
2190  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2191  os << "MT Symmetric Gauss-Seidel";
2192  } else if (PrecType_ == Ifpack2::Details::GS2) {
2193  os << "Two-stage Gauss-Seidel";
2194  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2195  os << "Two-stage Symmetric Gauss-Seidel";
2196  }
2197  else {
2198  os << "INVALID";
2199  }
2200  if(hasBlockCrsMatrix_)
2201  os<<", BlockCrs";
2202 
2203  os << ", " << "sweeps: " << NumSweeps_ << ", "
2204  << "damping factor: " << DampingFactor_ << ", ";
2205 
2206  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2207  os << "\"relaxation: mtgs cluster size\": " << clusterSize_ << ", ";
2208  os << "\"relaxation: long row threshold\": " << longRowThreshold_ << ", ";
2209  os << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << ", ";
2210  os << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2211  switch(mtColoringAlgorithm_)
2212  {
2213  case KokkosGraph::COLORING_DEFAULT:
2214  os << "DEFAULT"; break;
2215  case KokkosGraph::COLORING_SERIAL:
2216  os << "SERIAL"; break;
2217  case KokkosGraph::COLORING_VB:
2218  os << "VB"; break;
2219  case KokkosGraph::COLORING_VBBIT:
2220  os << "VBBIT"; break;
2221  case KokkosGraph::COLORING_VBCS:
2222  os << "VBCS"; break;
2223  case KokkosGraph::COLORING_VBD:
2224  os << "VBD"; break;
2225  case KokkosGraph::COLORING_VBDBIT:
2226  os << "VBDBIT"; break;
2227  case KokkosGraph::COLORING_EB:
2228  os << "EB"; break;
2229  default:
2230  os << "*Invalid*";
2231  }
2232  os << ", ";
2233  }
2234 
2235  if (PrecType_ == Ifpack2::Details::GS2 ||
2236  PrecType_ == Ifpack2::Details::SGS2) {
2237  os << "outer sweeps: " << NumOuterSweeps_ << ", "
2238  << "inner sweeps: " << NumInnerSweeps_ << ", "
2239  << "inner damping factor: " << InnerDampingFactor_ << ", ";
2240  }
2241 
2242  if (DoL1Method_) {
2243  os << "use l1: " << DoL1Method_ << ", "
2244  << "l1 eta: " << L1Eta_ << ", ";
2245  }
2246 
2247  if (A_.is_null ()) {
2248  os << "Matrix: null";
2249  }
2250  else {
2251  os << "Global matrix dimensions: ["
2252  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2253  << ", Global nnz: " << A_->getGlobalNumEntries();
2254  }
2255 
2256  os << "}";
2257  return os.str ();
2258 }
2259 
2260 
2261 template<class MatrixType>
2262 void
2265  const Teuchos::EVerbosityLevel verbLevel) const
2266 {
2267  using Teuchos::OSTab;
2269  using Teuchos::VERB_DEFAULT;
2270  using Teuchos::VERB_NONE;
2271  using Teuchos::VERB_LOW;
2272  using Teuchos::VERB_MEDIUM;
2273  using Teuchos::VERB_HIGH;
2274  using Teuchos::VERB_EXTREME;
2275  using std::endl;
2276 
2277  const Teuchos::EVerbosityLevel vl =
2278  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2279 
2280  const int myRank = this->getComm ()->getRank ();
2281 
2282  // none: print nothing
2283  // low: print O(1) info from Proc 0
2284  // medium:
2285  // high:
2286  // extreme:
2287 
2288  if (vl != VERB_NONE && myRank == 0) {
2289  // Describable interface asks each implementation to start with a tab.
2290  OSTab tab1 (out);
2291  // Output is valid YAML; hence the quotes, to protect the colons.
2292  out << "\"Ifpack2::Relaxation\":" << endl;
2293  OSTab tab2 (out);
2294  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2295  << endl;
2296  if (this->getObjectLabel () != "") {
2297  out << "Label: " << this->getObjectLabel () << endl;
2298  }
2299  out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2300  << "Computed: " << (isComputed () ? "true" : "false") << endl
2301  << "Parameters: " << endl;
2302  {
2303  OSTab tab3 (out);
2304  out << "\"relaxation: type\": ";
2305  if (PrecType_ == Ifpack2::Details::JACOBI) {
2306  out << "Jacobi";
2307  } else if (PrecType_ == Ifpack2::Details::GS) {
2308  out << "Gauss-Seidel";
2309  } else if (PrecType_ == Ifpack2::Details::SGS) {
2310  out << "Symmetric Gauss-Seidel";
2311  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2312  out << "MT Gauss-Seidel";
2313  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2314  out << "MT Symmetric Gauss-Seidel";
2315  } else if (PrecType_ == Ifpack2::Details::GS2) {
2316  out << "Two-stage Gauss-Seidel";
2317  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2318  out << "Two-stage Symmetric Gauss-Seidel";
2319  } else {
2320  out << "INVALID";
2321  }
2322  // We quote these parameter names because they contain colons.
2323  // YAML uses the colon to distinguish key from value.
2324  out << endl
2325  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2326  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2327  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2328  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2329  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2330  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2331  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2332  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2333  out << "\"relaxation: mtgs cluster size\": " << clusterSize_ << endl;
2334  out << "\"relaxation: long row threshold\": " << longRowThreshold_ << endl;
2335  out << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << endl;
2336  out << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2337  switch(mtColoringAlgorithm_)
2338  {
2339  case KokkosGraph::COLORING_DEFAULT:
2340  out << "DEFAULT"; break;
2341  case KokkosGraph::COLORING_SERIAL:
2342  out << "SERIAL"; break;
2343  case KokkosGraph::COLORING_VB:
2344  out << "VB"; break;
2345  case KokkosGraph::COLORING_VBBIT:
2346  out << "VBBIT"; break;
2347  case KokkosGraph::COLORING_VBCS:
2348  out << "VBCS"; break;
2349  case KokkosGraph::COLORING_VBD:
2350  out << "VBD"; break;
2351  case KokkosGraph::COLORING_VBDBIT:
2352  out << "VBDBIT"; break;
2353  case KokkosGraph::COLORING_EB:
2354  out << "EB"; break;
2355  default:
2356  out << "*Invalid*";
2357  }
2358  out << endl;
2359  }
2360  if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2361  out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2362  out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2363  out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2364  }
2365  }
2366  out << "Computed quantities:" << endl;
2367  {
2368  OSTab tab3 (out);
2369  out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2370  << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2371  }
2372  if (checkDiagEntries_ && isComputed ()) {
2373  out << "Properties of input diagonal entries:" << endl;
2374  {
2375  OSTab tab3 (out);
2376  out << "Magnitude of minimum-magnitude diagonal entry: "
2377  << globalMinMagDiagEntryMag_ << endl
2378  << "Magnitude of maximum-magnitude diagonal entry: "
2379  << globalMaxMagDiagEntryMag_ << endl
2380  << "Number of diagonal entries with small magnitude: "
2381  << globalNumSmallDiagEntries_ << endl
2382  << "Number of zero diagonal entries: "
2383  << globalNumZeroDiagEntries_ << endl
2384  << "Number of diagonal entries with negative real part: "
2385  << globalNumNegDiagEntries_ << endl
2386  << "Abs 2-norm diff between computed and actual inverse "
2387  << "diagonal: " << globalDiagNormDiff_ << endl;
2388  }
2389  }
2390  if (isComputed ()) {
2391  out << "Saved diagonal offsets: "
2392  << (savedDiagOffsets_ ? "true" : "false") << endl;
2393  }
2394  out << "Call counts and total times (in seconds): " << endl;
2395  {
2396  OSTab tab3 (out);
2397  out << "initialize: " << endl;
2398  {
2399  OSTab tab4 (out);
2400  out << "Call count: " << NumInitialize_ << endl;
2401  out << "Total time: " << InitializeTime_ << endl;
2402  }
2403  out << "compute: " << endl;
2404  {
2405  OSTab tab4 (out);
2406  out << "Call count: " << NumCompute_ << endl;
2407  out << "Total time: " << ComputeTime_ << endl;
2408  }
2409  out << "apply: " << endl;
2410  {
2411  OSTab tab4 (out);
2412  out << "Call count: " << NumApply_ << endl;
2413  out << "Total time: " << ApplyTime_ << endl;
2414  }
2415  }
2416  }
2417 }
2418 
2419 
2420 } // namespace Ifpack2
2421 
2422 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2423  template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2424 
2425 #endif // IFPACK2_RELAXATION_DEF_HPP
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:494
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:536
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:542
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2264
bool is_null(const boost::shared_ptr< T > &p)
basic_OSTab< char > OSTab
static magnitudeType eps()
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:183
T & get(const std::string &name, T def_value)
void compute()
Compute the preconditioner (&quot;numeric setup&quot;);.
Definition: Ifpack2_Relaxation_def.hpp:994
static RCP< Time > getNewCounter(const std::string &name)
static RCP< Time > lookupCounter(const std::string &name)
virtual void printDoc(std::string const &docString, std::ostream &out) const =0
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:549
virtual const std::string getXMLTypeName() const =0
static std::ostream & printLines(std::ostream &os, const std::string &linePrefix, const std::string &lines)
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:451
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:162
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:512
T * getRawPtr() const
bool remove(std::string const &name, bool throwIfNotExists=true)
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:524
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:232
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:471
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:441
File for utility functions.
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2166
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:500
void validateParametersAndSetDefaults(ParameterList const &validParamList, int const depth=1000)
virtual ValidStringsList validStringValues() const =0
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:484
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:229
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:518
any & getAny(bool activeQry=true)
Compute scaled damped residual for Chebyshev.
Definition: Ifpack2_Details_InverseDiagonalKernel_decl.hpp:45
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:675
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:462
TypeTo as(const TypeFrom &t)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:226
bool isType(const std::string &name) const
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:223
std::string typeName() const
const std::type_info & type() const
static double wallTime()
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:506
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:206
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:530
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:18
T * get() const
void initialize()
Initialize the preconditioner (&quot;symbolic setup&quot;).
Definition: Ifpack2_Relaxation_def.hpp:696
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Relaxation_decl.hpp:242
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:562
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:195
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:238
virtual void validate(ParameterEntry const &entry, std::string const &paramName, std::string const &sublistName) const =0
bool is_null() const