Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Relaxation_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_RELAXATION_DEF_HPP
44 #define IFPACK2_RELAXATION_DEF_HPP
45 
46 #include "Teuchos_StandardParameterEntryValidators.hpp"
47 #include "Teuchos_TimeMonitor.hpp"
48 #include "Tpetra_CrsMatrix.hpp"
49 #include "Tpetra_BlockCrsMatrix.hpp"
50 #include "Tpetra_BlockView.hpp"
51 #include "Ifpack2_Utilities.hpp"
52 #include "MatrixMarket_Tpetra.hpp"
53 #include "Tpetra_transform_MultiVector.hpp"
54 #include "Tpetra_withLocalAccess_MultiVector.hpp"
55 #include <cstdlib>
56 #include <memory>
57 #include <sstream>
58 #include "KokkosSparse_gauss_seidel.hpp"
59 
60 namespace {
61  // Validate that a given int is nonnegative.
62  class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
63  public:
64  // Constructor (does nothing).
65  NonnegativeIntValidator () {}
66 
67  // ParameterEntryValidator wants this method.
69  return Teuchos::null;
70  }
71 
72  // Actually validate the parameter's value.
73  void
74  validate (const Teuchos::ParameterEntry& entry,
75  const std::string& paramName,
76  const std::string& sublistName) const
77  {
78  using std::endl;
79  Teuchos::any anyVal = entry.getAny (true);
80  const std::string entryName = entry.getAny (false).typeName ();
81 
83  anyVal.type () != typeid (int),
85  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
86  << "\" has the wrong type." << endl << "Parameter: " << paramName
87  << endl << "Type specified: " << entryName << endl
88  << "Type required: int" << endl);
89 
90  const int val = Teuchos::any_cast<int> (anyVal);
93  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
94  << "\" is negative." << endl << "Parameter: " << paramName
95  << endl << "Value specified: " << val << endl
96  << "Required range: [0, INT_MAX]" << endl);
97  }
98 
99  // ParameterEntryValidator wants this method.
100  const std::string getXMLTypeName () const {
101  return "NonnegativeIntValidator";
102  }
103 
104  // ParameterEntryValidator wants this method.
105  void
106  printDoc (const std::string& docString,
107  std::ostream &out) const
108  {
109  Teuchos::StrUtils::printLines (out, "# ", docString);
110  out << "#\tValidator Used: " << std::endl;
111  out << "#\t\tNonnegativeIntValidator" << std::endl;
112  }
113  };
114 
115  // A way to get a small positive number (eps() for floating-point
116  // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
117  // define it (for example, for integer values).
118  template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
119  class SmallTraits {
120  public:
121  // Return eps if Scalar is a floating-point type, else return 1.
122  static const Scalar eps ();
123  };
124 
125  // Partial specialization for when Scalar is not a floating-point type.
126  template<class Scalar>
127  class SmallTraits<Scalar, true> {
128  public:
129  static const Scalar eps () {
131  }
132  };
133 
134  // Partial specialization for when Scalar is a floating-point type.
135  template<class Scalar>
136  class SmallTraits<Scalar, false> {
137  public:
138  static const Scalar eps () {
140  }
141  };
142 
143  // Work-around for GitHub Issue #5269.
144  template<class Scalar,
145  const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
146  struct RealTraits {};
147 
148  template<class Scalar>
149  struct RealTraits<Scalar, false> {
150  using val_type = Scalar;
151  using mag_type = Scalar;
152  static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
153  return z;
154  }
155  };
156 
157  template<class Scalar>
158  struct RealTraits<Scalar, true> {
159  using val_type = Scalar;
160  using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
161  static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
162  return Kokkos::ArithTraits<val_type>::real (z);
163  }
164  };
165 
166  template<class Scalar>
167  KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
168  getRealValue (const Scalar& z) {
169  return RealTraits<Scalar>::real (z);
170  }
171 
172 } // namespace (anonymous)
173 
174 namespace Ifpack2 {
175 
176 template<class MatrixType>
177 void
178 Relaxation<MatrixType>::
179 updateCachedMultiVector (const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
180  size_t numVecs) const
181 {
182  // Allocate a multivector if the cached one isn't perfect. Checking
183  // for map pointer equality is much cheaper than Map::isSameAs.
184  if (cachedMV_.is_null () ||
185  map.get () != cachedMV_->getMap ().get () ||
186  cachedMV_->getNumVectors () != numVecs) {
187  using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
188  global_ordinal_type, node_type>;
189  cachedMV_ = Teuchos::rcp (new MV (map, numVecs, false));
190  }
191 }
192 
193 
194 template<class MatrixType>
197 {
198  if (A.getRawPtr () != A_.getRawPtr ()) { // it's a different matrix
199  Importer_ = Teuchos::null;
200  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
201  isInitialized_ = false;
202  IsComputed_ = false;
203  diagOffsets_ = Kokkos::View<size_t*, typename node_type::device_type> ();
204  savedDiagOffsets_ = false;
205  hasBlockCrsMatrix_ = false;
206  if (! A.is_null ()) {
207  IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
208  }
209  A_ = A;
210  }
211 }
212 
213 
214 template<class MatrixType>
217 : A_ (A),
218  NumSweeps_ (1),
219  PrecType_ (Ifpack2::Details::JACOBI),
220  DampingFactor_ (STS::one ()),
221  IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
222  false : // a reasonable default if there's no communicator
223  A->getRowMap ()->getComm ()->getSize () > 1),
224  ZeroStartingSolution_ (true),
225  DoBackwardGS_ (false),
226  DoL1Method_ (false),
227  L1Eta_ (Teuchos::as<magnitude_type> (1.5)),
228  MinDiagonalValue_ (STS::zero ()),
229  fixTinyDiagEntries_ (false),
230  checkDiagEntries_ (false),
231  is_matrix_structurally_symmetric_ (false),
232  ifpack2_dump_matrix_(false),
233  isInitialized_ (false),
234  IsComputed_ (false),
235  NumInitialize_ (0),
236  NumCompute_ (0),
237  NumApply_ (0),
238  InitializeTime_ (0.0), // Times are double anyway, so no need for ScalarTraits.
239  ComputeTime_ (0.0),
240  ApplyTime_ (0.0),
241  ComputeFlops_ (0.0),
242  ApplyFlops_ (0.0),
243  globalMinMagDiagEntryMag_ (STM::zero ()),
244  globalMaxMagDiagEntryMag_ (STM::zero ()),
245  globalNumSmallDiagEntries_ (0),
246  globalNumZeroDiagEntries_ (0),
247  globalNumNegDiagEntries_ (0),
248  globalDiagNormDiff_(Teuchos::ScalarTraits<magnitude_type>::zero()),
249  savedDiagOffsets_ (false),
250  hasBlockCrsMatrix_ (false)
251 {
252  this->setObjectLabel ("Ifpack2::Relaxation");
253 }
254 
255 
256 template<class MatrixType>
259 {
260  using Teuchos::Array;
262  using Teuchos::parameterList;
263  using Teuchos::RCP;
264  using Teuchos::rcp;
265  using Teuchos::rcp_const_cast;
266  using Teuchos::rcp_implicit_cast;
267  using Teuchos::setStringToIntegralParameter;
269 
270  if (validParams_.is_null ()) {
271  RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
272 
273  // Set a validator that automatically converts from the valid
274  // string options to their enum values.
275  Array<std::string> precTypes (5);
276  precTypes[0] = "Jacobi";
277  precTypes[1] = "Gauss-Seidel";
278  precTypes[2] = "Symmetric Gauss-Seidel";
279  precTypes[3] = "MT Gauss-Seidel";
280  precTypes[4] = "MT Symmetric Gauss-Seidel";
281  Array<Details::RelaxationType> precTypeEnums (5);
282  precTypeEnums[0] = Details::JACOBI;
283  precTypeEnums[1] = Details::GS;
284  precTypeEnums[2] = Details::SGS;
285  precTypeEnums[3] = Details::MTGS;
286  precTypeEnums[4] = Details::MTSGS;
287  const std::string defaultPrecType ("Jacobi");
288  setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
289  defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
290  pl.getRawPtr ());
291 
292  const int numSweeps = 1;
293  RCP<PEV> numSweepsValidator =
294  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
295  pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
296  rcp_const_cast<const PEV> (numSweepsValidator));
297 
298  const scalar_type dampingFactor = STS::one ();
299  pl->set ("relaxation: damping factor", dampingFactor);
300 
301  const bool zeroStartingSolution = true;
302  pl->set ("relaxation: zero starting solution", zeroStartingSolution);
303 
304  const bool doBackwardGS = false;
305  pl->set ("relaxation: backward mode", doBackwardGS);
306 
307  const bool doL1Method = false;
308  pl->set ("relaxation: use l1", doL1Method);
309 
310  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
311  (STM::one() + STM::one()); // 1.5
312  pl->set ("relaxation: l1 eta", l1eta);
313 
314  const scalar_type minDiagonalValue = STS::zero ();
315  pl->set ("relaxation: min diagonal value", minDiagonalValue);
316 
317  const bool fixTinyDiagEntries = false;
318  pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
319 
320  const bool checkDiagEntries = false;
321  pl->set ("relaxation: check diagonal entries", checkDiagEntries);
322 
323  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
324  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
325 
326  const bool is_matrix_structurally_symmetric = false;
327  pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
328 
329  const bool ifpack2_dump_matrix = false;
330  pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
331 
332  validParams_ = rcp_const_cast<const ParameterList> (pl);
333  }
334  return validParams_;
335 }
336 
337 
338 template<class MatrixType>
340 {
341  using Teuchos::getIntegralValue;
343  using Teuchos::RCP;
344  typedef scalar_type ST; // just to make code below shorter
345 
346  if (pl.isType<double>("relaxation: damping factor")) {
347  // Make sure that ST=complex can run with a damping factor that is
348  // a double.
349  ST df = pl.get<double>("relaxation: damping factor");
350  pl.remove("relaxation: damping factor");
351  pl.set("relaxation: damping factor",df);
352  }
353 
355 
356  const Details::RelaxationType precType =
357  getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
358  const int numSweeps = pl.get<int> ("relaxation: sweeps");
359  const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
360  const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
361  const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
362  const bool doL1Method = pl.get<bool> ("relaxation: use l1");
363  const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
364  const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
365  const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
366  const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
367  const bool is_matrix_structurally_symmetric = pl.get<bool> ("relaxation: symmetric matrix structure");
368  const bool ifpack2_dump_matrix = pl.get<bool> ("relaxation: ifpack2 dump matrix");
369 
370  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
371 
372 
373  // "Commit" the changes, now that we've validated everything.
374  PrecType_ = precType;
375  NumSweeps_ = numSweeps;
376  DampingFactor_ = dampingFactor;
377  ZeroStartingSolution_ = zeroStartSol;
378  DoBackwardGS_ = doBackwardGS;
379  DoL1Method_ = doL1Method;
380  L1Eta_ = l1Eta;
381  MinDiagonalValue_ = minDiagonalValue;
382  fixTinyDiagEntries_ = fixTinyDiagEntries;
383  checkDiagEntries_ = checkDiagEntries;
384  is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
385  ifpack2_dump_matrix_ = ifpack2_dump_matrix;
386  localSmoothingIndices_ = localSmoothingIndices;
387 }
388 
389 
390 template<class MatrixType>
392 {
393  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
394  // but otherwise, we will get [unused] in pl
395  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
396 }
397 
398 
399 template<class MatrixType>
403  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
404  "The input matrix A is null. Please call setMatrix() with a nonnull "
405  "input matrix before calling this method.");
406  return A_->getRowMap ()->getComm ();
407 }
408 
409 
410 template<class MatrixType>
413  return A_;
414 }
415 
416 
417 template<class MatrixType>
418 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
419  typename MatrixType::global_ordinal_type,
420  typename MatrixType::node_type> >
423  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
424  "The input matrix A is null. Please call setMatrix() with a nonnull "
425  "input matrix before calling this method.");
426  return A_->getDomainMap ();
427 }
428 
429 
430 template<class MatrixType>
431 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
432  typename MatrixType::global_ordinal_type,
433  typename MatrixType::node_type> >
436  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
437  "The input matrix A is null. Please call setMatrix() with a nonnull "
438  "input matrix before calling this method.");
439  return A_->getRangeMap ();
440 }
441 
442 
443 template<class MatrixType>
445  return true;
446 }
447 
448 
449 template<class MatrixType>
451  return(NumInitialize_);
452 }
453 
454 
455 template<class MatrixType>
457  return(NumCompute_);
458 }
459 
460 
461 template<class MatrixType>
463  return(NumApply_);
464 }
465 
466 
467 template<class MatrixType>
469  return(InitializeTime_);
470 }
471 
472 
473 template<class MatrixType>
475  return(ComputeTime_);
476 }
477 
478 
479 template<class MatrixType>
481  return(ApplyTime_);
482 }
483 
484 
485 template<class MatrixType>
487  return(ComputeFlops_);
488 }
489 
490 
491 template<class MatrixType>
493  return(ApplyFlops_);
494 }
495 
496 
497 
498 template<class MatrixType>
501  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
502  "The input matrix A is null. Please call setMatrix() with a nonnull "
503  "input matrix, then call compute(), before calling this method.");
504  // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
505  return A_->getNodeNumRows() + A_->getNodeNumEntries();
506 }
507 
508 
509 template<class MatrixType>
510 void
512 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
513  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
514  Teuchos::ETransp /* mode */,
515  scalar_type alpha,
516  scalar_type beta) const
517 {
518  using Teuchos::as;
519  using Teuchos::RCP;
520  using Teuchos::rcp;
521  using Teuchos::rcpFromRef;
522  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
525  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
526  "The input matrix A is null. Please call setMatrix() with a nonnull "
527  "input matrix, then call compute(), before calling this method.");
529  ! isComputed (),
530  std::runtime_error,
531  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
532  "preconditioner instance before you may call apply(). You may call "
533  "isComputed() to find out if compute() has been called already.");
534  TEUCHOS_TEST_FOR_EXCEPTION(
535  X.getNumVectors() != Y.getNumVectors(),
536  std::runtime_error,
537  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
538  "X has " << X.getNumVectors() << " columns, but Y has "
539  << Y.getNumVectors() << " columns.");
540  TEUCHOS_TEST_FOR_EXCEPTION(
541  beta != STS::zero (), std::logic_error,
542  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
543  "implemented.");
544 
545  const std::string timerName ("Ifpack2::Relaxation::apply");
547  if (timer.is_null ()) {
548  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
549  }
550 
551  {
552  Teuchos::TimeMonitor timeMon (*timer);
553  // Special case: alpha == 0.
554  if (alpha == STS::zero ()) {
555  // No floating-point operations, so no need to update a count.
556  Y.putScalar (STS::zero ());
557  }
558  else {
559  // If X and Y alias one another, then we need to create an
560  // auxiliary vector, Xcopy (a deep copy of X).
561  RCP<const MV> Xcopy;
562  // FIXME (mfh 12 Sep 2014) This test for aliasing is incomplete.
563  {
564  auto X_lcl_host = X.getLocalViewHost ();
565  auto Y_lcl_host = Y.getLocalViewHost ();
566 
567  if (X_lcl_host.data () == Y_lcl_host.data ()) {
568  Xcopy = rcp (new MV (X, Teuchos::Copy));
569  } else {
570  Xcopy = rcpFromRef (X);
571  }
572  }
573 
574  // Each of the following methods updates the flop count itself.
575  // All implementations handle zeroing out the starting solution
576  // (if necessary) themselves.
577  switch (PrecType_) {
578  case Ifpack2::Details::JACOBI:
579  ApplyInverseJacobi(*Xcopy,Y);
580  break;
581  case Ifpack2::Details::GS:
582  ApplyInverseGS(*Xcopy,Y);
583  break;
584  case Ifpack2::Details::SGS:
585  ApplyInverseSGS(*Xcopy,Y);
586  break;
587  case Ifpack2::Details::MTSGS:
588  ApplyInverseMTSGS_CrsMatrix(*Xcopy,Y);
589  break;
590  case Ifpack2::Details::MTGS:
591  ApplyInverseMTGS_CrsMatrix(*Xcopy,Y);
592  break;
593 
594  default:
595  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
596  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
597  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
598  }
599  if (alpha != STS::one ()) {
600  Y.scale (alpha);
601  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
602  const double numVectors = as<double> (Y.getNumVectors ());
603  ApplyFlops_ += numGlobalRows * numVectors;
604  }
605  }
606  }
607  ApplyTime_ += timer->totalElapsedTime ();
608  ++NumApply_;
609 }
610 
611 
612 template<class MatrixType>
613 void
615 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
616  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
617  Teuchos::ETransp mode) const
618 {
620  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
621  "The input matrix A is null. Please call setMatrix() with a nonnull "
622  "input matrix, then call compute(), before calling this method.");
624  ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
625  "isComputed() must be true before you may call applyMat(). "
626  "Please call compute() before calling this method.");
627  TEUCHOS_TEST_FOR_EXCEPTION(
628  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
629  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
630  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
631  A_->apply (X, Y, mode);
632 }
633 
634 
635 template<class MatrixType>
637 {
638  const char methodName[] = "Ifpack2::Relaxation::initialize";
639 
641  (A_.is_null (), std::runtime_error, methodName << ": The "
642  "input matrix A is null. Please call setMatrix() with "
643  "a nonnull input matrix before calling this method.");
644 
647 
648  { // Timing of initialize starts here
649  Teuchos::TimeMonitor timeMon (*timer);
650  isInitialized_ = false;
651 
652  {
653  auto rowMap = A_->getRowMap ();
654  auto comm = rowMap.is_null () ? Teuchos::null : rowMap->getComm ();
655  IsParallel_ = ! comm.is_null () && comm->getSize () > 1;
656  }
657 
658  // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
659  // but in that case, the domain and column Maps are the same and
660  // we don't need to Import anyway.
661  //
662  // mfh 07 May 2019: A_->getGraph() might be an
663  // OverlappingRowGraph, which doesn't have an Import object.
664  // However, in that case, the comm should have size 1.
665  Importer_ = IsParallel_ ? A_->getGraph ()->getImporter () :
666  Teuchos::null;
667 
668  {
670  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
671  hasBlockCrsMatrix_ = ! A_bcrs.is_null ();
672  }
673 
674  if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS) {
675  const crs_matrix_type* crsMat =
676  dynamic_cast<const crs_matrix_type*> (A_.get());
678  (crsMat == nullptr, std::logic_error, methodName << ": "
679  "Multithreaded Gauss-Seidel methods currently only work "
680  "when the input matrix is a Tpetra::CrsMatrix.");
681 
682  // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
683  // compute, not initialize, since users may change the matrix's
684  // values at any time before calling compute.
685  if (ifpack2_dump_matrix_) {
686  static int sequence_number = 0;
687  const std::string file_name = "Ifpack2_MT_GS_" +
688  std::to_string (sequence_number++) + ".mtx";
690  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
691  if (! rcp_crs_mat.is_null ()) {
692  using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
693  writer_type::writeSparseFile (file_name, rcp_crs_mat);
694  }
695  }
696 
697  this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
698  if (mtKernelHandle_->get_gs_handle () == nullptr) {
699  mtKernelHandle_->create_gs_handle ();
700  }
701  local_matrix_type kcsr = crsMat->getLocalMatrix ();
702 
703  bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
704  is_symmetric = is_symmetric || is_matrix_structurally_symmetric_;
705 
706  using KokkosSparse::Experimental::gauss_seidel_symbolic;
707  gauss_seidel_symbolic<mt_kernel_handle_type,
708  lno_row_view_t,
709  lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
710  A_->getNodeNumRows (),
711  A_->getNodeNumCols (),
712  kcsr.graph.row_map,
713  kcsr.graph.entries,
714  is_symmetric);
715  }
716  } // timing of initialize stops here
717 
718  InitializeTime_ += timer->totalElapsedTime ();
719  ++NumInitialize_;
720  isInitialized_ = true;
721 }
722 
723 namespace Impl {
724 template <typename BlockDiagView>
725 struct InvertDiagBlocks {
726  typedef int value_type;
727  typedef typename BlockDiagView::size_type Size;
728 
729 private:
730  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
731  template <typename View>
732  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
733  typename View::device_type, Unmanaged>;
734 
735  typedef typename BlockDiagView::non_const_value_type Scalar;
736  typedef typename BlockDiagView::device_type Device;
737  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
738  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
739 
740  UnmanagedView<BlockDiagView> block_diag_;
741  // TODO Use thread team and scratch memory space. In this first
742  // pass, provide workspace for each block.
743  RWrk rwrk_buf_;
744  UnmanagedView<RWrk> rwrk_;
745  IWrk iwrk_buf_;
746  UnmanagedView<IWrk> iwrk_;
747 
748 public:
749  InvertDiagBlocks (BlockDiagView& block_diag)
750  : block_diag_(block_diag)
751  {
752  const auto blksz = block_diag.extent(1);
753  Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
754  rwrk_ = rwrk_buf_;
755  Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
756  iwrk_ = iwrk_buf_;
757  }
758 
759  KOKKOS_INLINE_FUNCTION
760  void operator() (const Size i, int& jinfo) const {
761  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
762  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
763  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
764  int info = 0;
765  Tpetra::GETF2(D_cur, ipiv, info);
766  if (info) {
767  ++jinfo;
768  return;
769  }
770  Tpetra::GETRI(D_cur, ipiv, work, info);
771  if (info) ++jinfo;
772  }
773 
774  // Report the number of blocks with errors.
775  KOKKOS_INLINE_FUNCTION
776  void join (volatile value_type& dst, volatile value_type const& src) const { dst += src; }
777 };
778 }
779 
780 template<class MatrixType>
781 void Relaxation<MatrixType>::computeBlockCrs ()
782 {
783  using Kokkos::ALL;
784  using Teuchos::Array;
785  using Teuchos::ArrayRCP;
786  using Teuchos::ArrayView;
787  using Teuchos::as;
788  using Teuchos::Comm;
789  using Teuchos::RCP;
790  using Teuchos::rcp;
791  using Teuchos::REDUCE_MAX;
792  using Teuchos::REDUCE_MIN;
793  using Teuchos::REDUCE_SUM;
794  using Teuchos::rcp_dynamic_cast;
795  using Teuchos::reduceAll;
796  typedef local_ordinal_type LO;
797  typedef typename node_type::device_type device_type;
798 
799  const std::string timerName ("Ifpack2::Relaxation::computeBlockCrs");
801  if (timer.is_null ()) {
802  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
803  }
804  {
805  Teuchos::TimeMonitor timeMon (*timer);
806 
808  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
809  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
810  "with a nonnull input matrix, then call initialize(), before calling "
811  "this method.");
812  const block_crs_matrix_type* blockCrsA =
813  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
815  blockCrsA == nullptr, std::logic_error, "Ifpack2::Relaxation::"
816  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
817  "got this far. Please report this bug to the Ifpack2 developers.");
818 
819  const scalar_type one = STS::one ();
820 
821  // Reset state.
822  IsComputed_ = false;
823 
824  const LO lclNumMeshRows =
825  blockCrsA->getCrsGraph ().getNodeNumRows ();
826  const LO blockSize = blockCrsA->getBlockSize ();
827 
828  if (! savedDiagOffsets_) {
829  blockDiag_ = block_diag_type (); // clear it before reallocating
830  blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
831  lclNumMeshRows, blockSize, blockSize);
832  if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
833  // Clear diagOffsets_ first (by assigning an empty View to it)
834  // to save memory, before reallocating.
835  diagOffsets_ = Kokkos::View<size_t*, device_type> ();
836  diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
837  }
838  blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
840  (static_cast<size_t> (diagOffsets_.extent (0)) !=
841  static_cast<size_t> (blockDiag_.extent (0)),
842  std::logic_error, "diagOffsets_.extent(0) = " <<
843  diagOffsets_.extent (0) << " != blockDiag_.extent(0) = "
844  << blockDiag_.extent (0) <<
845  ". Please report this bug to the Ifpack2 developers.");
846  savedDiagOffsets_ = true;
847  }
848  blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
849 
850  // Use an unmanaged View in this method, so that when we take
851  // subviews of it (to get each diagonal block), we don't have to
852  // touch the reference count. Reference count updates are a
853  // thread scalability bottleneck and have a performance cost even
854  // without using threads.
855  unmanaged_block_diag_type blockDiag = blockDiag_;
856 
857  if (DoL1Method_ && IsParallel_) {
858  const scalar_type two = one + one;
859  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
860  Array<LO> indices (maxLength);
861  Array<scalar_type> values (maxLength * blockSize * blockSize);
862  size_t numEntries = 0;
863 
864  for (LO i = 0; i < lclNumMeshRows; ++i) {
865  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
866  blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
867 
868  auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
869  for (LO subRow = 0; subRow < blockSize; ++subRow) {
870  magnitude_type diagonal_boost = STM::zero ();
871  for (size_t k = 0 ; k < numEntries ; ++k) {
872  if (indices[k] > lclNumMeshRows) {
873  const size_t offset = blockSize*blockSize*k + subRow*blockSize;
874  for (LO subCol = 0; subCol < blockSize; ++subCol) {
875  diagonal_boost += STS::magnitude (values[offset+subCol] / two);
876  }
877  }
878  }
879  if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
880  diagBlock(subRow, subRow) += diagonal_boost;
881  }
882  }
883  }
884  }
885 
886  int info = 0;
887  {
888  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
889  typedef typename unmanaged_block_diag_type::execution_space exec_space;
890  typedef Kokkos::RangePolicy<exec_space, LO> range_type;
891 
892  Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
893  // FIXME (mfh 19 May 2017) Different processes may not
894  // necessarily have this error all at the same time, so it would
895  // be better just to preserve a local error state and let users
896  // check.
898  (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
899  << " diagonal blocks.");
900  }
901 
902  // In a debug build, do an extra test to make sure that all the
903  // factorizations were computed correctly.
904 #ifdef HAVE_IFPACK2_DEBUG
905  const int numResults = 2;
906  // Use "max = -min" trick to get min and max in a single all-reduce.
907  int lclResults[2], gblResults[2];
908  lclResults[0] = info;
909  lclResults[1] = -info;
910  gblResults[0] = 0;
911  gblResults[1] = 0;
912  reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
913  numResults, lclResults, gblResults);
915  (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
916  "Ifpack2::Relaxation::compute: When processing the input "
917  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
918  "failed on one or more (MPI) processes.");
919 #endif // HAVE_IFPACK2_DEBUG
920  } // end TimeMonitor scope
921 
922  ComputeTime_ += timer->totalElapsedTime ();
923  ++NumCompute_;
924  IsComputed_ = true;
925 }
926 
927 template<class MatrixType>
929 {
930  using Tpetra::readWrite;
931  using Teuchos::Array;
932  using Teuchos::ArrayRCP;
933  using Teuchos::ArrayView;
934  using Teuchos::as;
935  using Teuchos::Comm;
936  using Teuchos::RCP;
937  using Teuchos::rcp;
938  using Teuchos::REDUCE_MAX;
939  using Teuchos::REDUCE_MIN;
940  using Teuchos::REDUCE_SUM;
941  using Teuchos::rcp_dynamic_cast;
942  using Teuchos::reduceAll;
943  using LO = local_ordinal_type;
944  using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
946  using device_type = typename vector_type::device_type;
947  using IST = typename vector_type::impl_scalar_type;
948  using KAT = Kokkos::ArithTraits<IST>;
949 
950  const char methodName[] = "Ifpack2::Relaxation::compute";
951  const scalar_type zero = STS::zero ();
952  const scalar_type one = STS::one ();
953 
954  // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
955  // environment variable to control this behavior at run time.
956 #ifdef HAVE_IFPACK2_DEBUG
957  constexpr bool debug = true;
958 #else
959  constexpr bool debug = false;
960 #endif // HAVE_IFPACK2_DEBUG
961 
963  (A_.is_null (), std::runtime_error, methodName << ": "
964  "The input matrix A is null. Please call setMatrix() with a nonnull "
965  "input matrix, then call initialize(), before calling this method.");
966 
967  // We don't count initialization in compute() time.
968  if (! isInitialized ()) {
969  initialize ();
970  }
971 
972  if (hasBlockCrsMatrix_) {
973  computeBlockCrs ();
974  return;
975  }
976 
979 
980  { // Timing of compute starts here.
981  Teuchos::TimeMonitor timeMon (*timer);
983  (NumSweeps_ < 0, std::logic_error, methodName
984  << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
985  "Please report this bug to the Ifpack2 developers.");
986  IsComputed_ = false;
987 
988  Diagonal_ = Teuchos::null;
989  // A_->getLocalDiagCopy fills in all Vector entries, even if the
990  // matrix has no stored entries in the corresponding rows.
991  Diagonal_ = rcp (new vector_type (A_->getRowMap (), false));
992 
993  // Extract the diagonal entries. The CrsMatrix static graph
994  // version is faster for subsequent calls to compute(), since it
995  // caches the diagonal offsets.
996  //
997  // isStaticGraph() == true means that the matrix was created with
998  // a const graph. The only requirement is that the structure of
999  // the matrix never changes, so isStaticGraph() == true is a bit
1000  // more conservative than we need. However, Tpetra doesn't (as of
1001  // 05 Apr 2013) have a way to tell if the graph hasn't changed
1002  // since the last time we used it.
1003  {
1004  // NOTE (mfh 07 Jul 2013): We must cast here to CrsMatrix
1005  // instead of MatrixType, because isStaticGraph is a CrsMatrix
1006  // method (not inherited from RowMatrix's interface). It's
1007  // perfectly valid to do relaxation on a RowMatrix which is not
1008  // a CrsMatrix.
1009  const crs_matrix_type* crsMat =
1010  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
1011  if (crsMat == nullptr || ! crsMat->isStaticGraph ()) {
1012  A_->getLocalDiagCopy (*Diagonal_); // slow path
1013  }
1014  else {
1015  if (! savedDiagOffsets_) { // we haven't precomputed offsets
1016  const size_t lclNumRows = A_->getRowMap ()->getNodeNumElements ();
1017  if (diagOffsets_.extent (0) < lclNumRows) {
1018  using Kokkos::view_alloc;
1019  using Kokkos::WithoutInitializing;
1020  using offsets_view_type = Kokkos::View<size_t*, device_type>;
1021 
1022  diagOffsets_ = offsets_view_type (); // clear 1st to save mem
1023  auto howAlloc = view_alloc ("offsets", WithoutInitializing);
1024  diagOffsets_ = offsets_view_type (howAlloc, lclNumRows);
1025  }
1026  crsMat->getCrsGraph ()->getLocalDiagOffsets (diagOffsets_);
1027  savedDiagOffsets_ = true;
1028  }
1029  crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_);
1030 
1031  if (debug) {
1032  // Validate the fast-path diagonal against the slow-path diagonal.
1033  vector_type D_copy (A_->getRowMap ());
1034  A_->getLocalDiagCopy (D_copy);
1035  D_copy.update (STS::one (), *Diagonal_, -STS::one ());
1036  const magnitude_type err = D_copy.normInf ();
1037  // The two diagonals should be exactly the same, so their
1038  // difference should be exactly zero.
1040  (err != STM::zero(), std::logic_error, methodName << ": "
1041  << "\"fast-path\" diagonal computation failed. "
1042  "\\|D1 - D2\\|_inf = " << err << ".");
1043  }
1044  }
1045  }
1046 
1047  // If we're checking the computed inverse diagonal, then keep a
1048  // copy of the original diagonal entries for later comparison.
1049  std::unique_ptr<vector_type> origDiag;
1050  if (checkDiagEntries_) {
1051  origDiag = std::unique_ptr<vector_type>
1052  (new vector_type (*Diagonal_, Teuchos::Copy));
1053  }
1054 
1055  const LO numMyRows = static_cast<LO> (A_->getNodeNumRows ());
1056 
1057  // Setup for L1 Methods.
1058  // Here we add half the value of the off-processor entries in the row,
1059  // but only if diagonal isn't sufficiently large.
1060  //
1061  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1062  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1063  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1064  //
1065  // FIXME (mfh 27 May 2019) Add device-parallel version for when
1066  // *A_ is a CrsMatrix. Furthermore, in that case, we should
1067  // combine diagonal extraction above with L1 modification into a
1068  // single parallel loop.
1069  if (DoL1Method_ && IsParallel_) {
1070  vector_type& gblDiag = *Diagonal_;
1071  using rw_type =
1072  decltype (readWrite (gblDiag).on (Kokkos::HostSpace ()));
1073  // Once we have C++14, we can get rid of this alias and use
1074  // "auto" in the lambda below.
1075  using lcl_vec_type =
1076  Tpetra::with_local_access_function_argument_type<rw_type>;
1077  const row_matrix_type& A_row = *A_;
1078  const magnitude_type L1_eta = L1Eta_;
1079  Tpetra::withLocalAccess
1080  ([&A_row, L1_eta, numMyRows] (const lcl_vec_type& diag) {
1081  const magnitude_type two = STM::one () + STM::one ();
1082  const size_t maxLength = A_row.getNodeMaxNumRowEntries ();
1083  Array<local_ordinal_type> indices (maxLength);
1084  Array<scalar_type> values (maxLength);
1085  size_t numEntries;
1086 
1087  for (LO i = 0; i < numMyRows; ++i) {
1088  A_row.getLocalRowCopy (i, indices (), values (), numEntries);
1089  magnitude_type diagonal_boost = STM::zero ();
1090  for (size_t k = 0 ; k < numEntries; ++k) {
1091  if (indices[k] > numMyRows) {
1092  diagonal_boost += STS::magnitude (values[k] / two);
1093  }
1094  }
1095  if (KAT::magnitude (diag[i]) < L1_eta * diagonal_boost) {
1096  diag[i] += diagonal_boost;
1097  }
1098  }
1099  }, readWrite (gblDiag).on (Kokkos::HostSpace ()));
1100  }
1101 
1102  //
1103  // Invert the matrix's diagonal entries (in Diagonal_).
1104  //
1105 
1106  // Precompute some quantities for "fixing" zero or tiny diagonal
1107  // entries. We'll only use them if this "fixing" is enabled.
1108  //
1109  // SmallTraits covers for the lack of eps() in
1110  // Teuchos::ScalarTraits when its template parameter is not a
1111  // floating-point type. (Ifpack2 sometimes gets instantiated for
1112  // integer Scalar types.)
1113  const IST oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
1114  KAT::one () / static_cast<IST> (SmallTraits<scalar_type>::eps ()) :
1115  KAT::one () / static_cast<IST> (MinDiagonalValue_);
1116  // It's helpful not to have to recompute this magnitude each time.
1117  const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1118 
1119  if (checkDiagEntries_) {
1120  //
1121  // Check diagonal elements, replace zero elements with the minimum
1122  // diagonal value, and store their inverses. Count the number of
1123  // "small" and zero diagonal entries while we're at it.
1124  //
1125  size_t numSmallDiagEntries = 0; // "small" includes zero
1126  size_t numZeroDiagEntries = 0; // # zero diagonal entries
1127  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1128  magnitude_type minMagDiagEntryMag = STM::zero ();
1129  magnitude_type maxMagDiagEntryMag = STM::zero ();
1130 
1131  vector_type& gblDiag = *Diagonal_;
1132  // Once we have C++14, we can get rid of these two aliases and
1133  // use "auto" in the lambda below.
1134  using rw_type =
1135  decltype (readWrite (gblDiag).on (Kokkos::HostSpace ()));
1136  using lcl_vec_type =
1137  Tpetra::with_local_access_function_argument_type<rw_type>;
1138  Tpetra::withLocalAccess
1139  ([&] (const lcl_vec_type& diag) {
1140  // As we go, keep track of the diagonal entries with the
1141  // least and greatest magnitude. We could use the trick of
1142  // starting min with +Inf and max with -Inf, but that
1143  // doesn't work if scalar_type is a built-in integer type.
1144  // Thus, we have to start by reading the first diagonal
1145  // entry redundantly.
1146  if (numMyRows != 0) {
1147  const magnitude_type d_0_mag = KAT::abs (diag[0]);
1148  minMagDiagEntryMag = d_0_mag;
1149  maxMagDiagEntryMag = d_0_mag;
1150  }
1151 
1152  // Go through all the diagonal entries. Compute counts of
1153  // small-magnitude, zero, and negative-real-part entries.
1154  // Invert the diagonal entries that aren't too small. For
1155  // those too small in magnitude, replace them with
1156  // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1157  // happens to be zero).
1158  for (LO i = 0; i < numMyRows; ++i) {
1159  const IST d_i = diag[i];
1160  const magnitude_type d_i_mag = KAT::abs (d_i);
1161  // Work-around for GitHub Issue #5269.
1162  //const magnitude_type d_i_real = KAT::real (d_i);
1163  const auto d_i_real = getRealValue (d_i);
1164 
1165  // We can't compare complex numbers, but we can compare their
1166  // real parts.
1167  if (d_i_real < STM::zero ()) {
1168  ++numNegDiagEntries;
1169  }
1170  if (d_i_mag < minMagDiagEntryMag) {
1171  minMagDiagEntryMag = d_i_mag;
1172  }
1173  if (d_i_mag > maxMagDiagEntryMag) {
1174  maxMagDiagEntryMag = d_i_mag;
1175  }
1176 
1177  if (fixTinyDiagEntries_) {
1178  // <= not <, in case minDiagValMag is zero.
1179  if (d_i_mag <= minDiagValMag) {
1180  ++numSmallDiagEntries;
1181  if (d_i_mag == STM::zero ()) {
1182  ++numZeroDiagEntries;
1183  }
1184  diag[i] = oneOverMinDiagVal;
1185  }
1186  else {
1187  diag[i] = KAT::one () / d_i;
1188  }
1189  }
1190  else { // Don't fix zero or tiny diagonal entries.
1191  // <= not <, in case minDiagValMag is zero.
1192  if (d_i_mag <= minDiagValMag) {
1193  ++numSmallDiagEntries;
1194  if (d_i_mag == STM::zero ()) {
1195  ++numZeroDiagEntries;
1196  }
1197  }
1198  diag[i] = KAT::one () / d_i;
1199  }
1200  }
1201  }, readWrite (gblDiag).on (Kokkos::HostSpace ()));
1202 
1203  // Count floating-point operations of computing the inverse diagonal.
1204  //
1205  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1206  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1207  ComputeFlops_ += 4.0 * numMyRows;
1208  }
1209  else {
1210  ComputeFlops_ += numMyRows;
1211  }
1212 
1213  // Collect global data about the diagonal entries. Only do this
1214  // (which involves O(1) all-reduces) if the user asked us to do
1215  // the extra work.
1216  //
1217  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1218  // zero rows. Fixing this requires one of two options:
1219  //
1220  // 1. Use a custom reduction operation that ignores processes
1221  // with zero diagonal entries.
1222  // 2. Split the communicator, compute all-reduces using the
1223  // subcommunicator over processes that have a nonzero number
1224  // of diagonal entries, and then broadcast from one of those
1225  // processes (if there is one) to the processes in the other
1226  // subcommunicator.
1227  const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1228 
1229  // Compute global min and max magnitude of entries.
1230  Array<magnitude_type> localVals (2);
1231  localVals[0] = minMagDiagEntryMag;
1232  // (- (min (- x))) is the same as (max x).
1233  localVals[1] = -maxMagDiagEntryMag;
1234  Array<magnitude_type> globalVals (2);
1235  reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1236  localVals.getRawPtr (),
1237  globalVals.getRawPtr ());
1238  globalMinMagDiagEntryMag_ = globalVals[0];
1239  globalMaxMagDiagEntryMag_ = -globalVals[1];
1240 
1241  Array<size_t> localCounts (3);
1242  localCounts[0] = numSmallDiagEntries;
1243  localCounts[1] = numZeroDiagEntries;
1244  localCounts[2] = numNegDiagEntries;
1245  Array<size_t> globalCounts (3);
1246  reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1247  localCounts.getRawPtr (),
1248  globalCounts.getRawPtr ());
1249  globalNumSmallDiagEntries_ = globalCounts[0];
1250  globalNumZeroDiagEntries_ = globalCounts[1];
1251  globalNumNegDiagEntries_ = globalCounts[2];
1252 
1253  // Compute and save the difference between the computed inverse
1254  // diagonal, and the original diagonal's inverse.
1255  vector_type diff (A_->getRowMap ());
1256  diff.reciprocal (*origDiag);
1257  if (Diagonal_->need_sync_device ()) {
1258  Diagonal_->sync_device ();
1259  }
1260  diff.update (-one, *Diagonal_, one);
1261  globalDiagNormDiff_ = diff.norm2 ();
1262  }
1263  else { // don't check diagonal elements
1264  if (fixTinyDiagEntries_) {
1265  // Go through all the diagonal entries. Invert those that
1266  // aren't too small in magnitude. For those that are too
1267  // small in magnitude, replace them with oneOverMinDiagVal.
1268  vector_type& gblDiag = *Diagonal_;
1269  Tpetra::transform
1270  ("Ifpack2::Relaxation::compute: Invert & fix diagonal",
1271  gblDiag, gblDiag,
1272  KOKKOS_LAMBDA (const IST& d_i) {
1273  const magnitude_type d_i_mag = KAT::magnitude (d_i);
1274 
1275  // <= not <, in case minDiagValMag is zero.
1276  if (d_i_mag <= minDiagValMag) {
1277  return oneOverMinDiagVal;
1278  }
1279  else {
1280  // For Stokhos types, operator/ returns an expression
1281  // type. Explicitly convert to IST before returning.
1282  return IST (KAT::one () / d_i);
1283  }
1284  });
1285  }
1286  else { // don't fix tiny or zero diagonal entries
1287  if (Diagonal_->need_sync_device ()) {
1288  Diagonal_->sync_device ();
1289  }
1290  Diagonal_->reciprocal (*Diagonal_);
1291  }
1292 
1293  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1294  ComputeFlops_ += 4.0 * numMyRows;
1295  }
1296  else {
1297  ComputeFlops_ += numMyRows;
1298  }
1299  }
1300 
1301  if (Diagonal_->need_sync_device ()) {
1302  Diagonal_->sync_device ();
1303  }
1304 
1305  if (PrecType_ == Ifpack2::Details::MTGS ||
1306  PrecType_ == Ifpack2::Details::MTSGS) {
1307  //KokkosKernels GaussSeidel Initialization.
1308 
1309  const crs_matrix_type* crsMat =
1310  dynamic_cast<const crs_matrix_type*> (A_.get());
1312  (crsMat == nullptr, std::logic_error, methodName << ": "
1313  "Multithreaded Gauss-Seidel methods currently only work "
1314  "when the input matrix is a Tpetra::CrsMatrix.");
1315  local_matrix_type kcsr = crsMat->getLocalMatrix ();
1316 
1317  const bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
1318  auto diagView_2d = Diagonal_->getLocalViewDevice ();
1319  auto diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1320  using KokkosSparse::Experimental::gauss_seidel_numeric;
1321  gauss_seidel_numeric<mt_kernel_handle_type,
1322  lno_row_view_t,
1323  lno_nonzero_view_t,
1324  scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1325  A_->getNodeNumRows (),
1326  A_->getNodeNumCols (),
1327  kcsr.graph.row_map,
1328  kcsr.graph.entries,
1329  kcsr.values,
1330  diagView_1d,
1331  is_symmetric);
1332  }
1333  } // end TimeMonitor scope
1334 
1335  ComputeTime_ += timer->totalElapsedTime ();
1336  ++NumCompute_;
1337  IsComputed_ = true;
1338 }
1339 
1340 
1341 template<class MatrixType>
1342 void
1344 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1345  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1346 {
1347  using Teuchos::as;
1348  if (hasBlockCrsMatrix_) {
1349  ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1350  return;
1351  }
1352 
1353  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1354  const double numVectors = as<double> (X.getNumVectors ());
1355  if (ZeroStartingSolution_) {
1356  // For the first Jacobi sweep, if we are allowed to assume that
1357  // the initial guess is zero, then Jacobi is just diagonal
1358  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1359  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1360  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1361 
1362  // Count (global) floating-point operations. Ifpack2 represents
1363  // this as a floating-point number rather than an integer, so that
1364  // overflow (for a very large number of calls, or a very large
1365  // problem) is approximate instead of catastrophic.
1366  double flopUpdate = 0.0;
1367  if (DampingFactor_ == STS::one ()) {
1368  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1369  flopUpdate = numGlobalRows * numVectors;
1370  } else {
1371  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1372  // Two multiplies per entry of Y.
1373  flopUpdate = 2.0 * numGlobalRows * numVectors;
1374  }
1375  ApplyFlops_ += flopUpdate;
1376  if (NumSweeps_ == 1) {
1377  return;
1378  }
1379  }
1380  // If we were allowed to assume that the starting guess was zero,
1381  // then we have already done the first sweep above.
1382  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1383 
1384  // Allocate a multivector if the cached one isn't perfect
1385  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1386 
1387  for (int j = startSweep; j < NumSweeps_; ++j) {
1388  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1389  applyMat (Y, *cachedMV_);
1390  cachedMV_->update (STS::one (), X, -STS::one ());
1391  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1392  }
1393 
1394  // For each column of output, for each pass over the matrix:
1395  //
1396  // - One + and one * for each matrix entry
1397  // - One / and one + for each row of the matrix
1398  // - If the damping factor is not one: one * for each row of the
1399  // matrix. (It's not fair to count this if the damping factor is
1400  // one, since the implementation could skip it. Whether it does
1401  // or not is the implementation's choice.)
1402 
1403  // Floating-point operations due to the damping factor, per matrix
1404  // row, per direction, per columm of output.
1405  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1406  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1407  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1408  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1409 }
1410 
1411 template<class MatrixType>
1412 void
1413 Relaxation<MatrixType>::
1414 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1415  local_ordinal_type,
1416  global_ordinal_type,
1417  node_type>& X,
1418  Tpetra::MultiVector<scalar_type,
1419  local_ordinal_type,
1420  global_ordinal_type,
1421  node_type>& Y) const
1422 {
1423  using Tpetra::BlockMultiVector;
1424  using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1425  global_ordinal_type, node_type>;
1426 
1427  const block_crs_matrix_type* blockMatConst =
1428  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1430  (blockMatConst == nullptr, std::logic_error, "This method should "
1431  "never be called if the matrix A_ is not a BlockCrsMatrix. "
1432  "Please report this bug to the Ifpack2 developers.");
1433  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1434  // This is because applyBlock() is nonconst (more accurate), while
1435  // apply() is const (required by Tpetra::Operator interface, but a
1436  // lie, because it possibly allocates temporary buffers).
1437  block_crs_matrix_type* blockMat =
1438  const_cast<block_crs_matrix_type*> (blockMatConst);
1439 
1440  auto meshRowMap = blockMat->getRowMap ();
1441  auto meshColMap = blockMat->getColMap ();
1442  const local_ordinal_type blockSize = blockMat->getBlockSize ();
1443 
1444  BMV X_blk (X, *meshColMap, blockSize);
1445  BMV Y_blk (Y, *meshRowMap, blockSize);
1446 
1447  if (ZeroStartingSolution_) {
1448  // For the first sweep, if we are allowed to assume that the
1449  // initial guess is zero, then block Jacobi is just block diagonal
1450  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1451  Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1452  if (NumSweeps_ == 1) {
1453  return;
1454  }
1455  }
1456 
1457  auto pointRowMap = Y.getMap ();
1458  const size_t numVecs = X.getNumVectors ();
1459 
1460  // We don't need to initialize the result MV, since the sparse
1461  // mat-vec will clobber its contents anyway.
1462  BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1463 
1464  // If we were allowed to assume that the starting guess was zero,
1465  // then we have already done the first sweep above.
1466  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1467 
1468  for (int j = startSweep; j < NumSweeps_; ++j) {
1469  blockMat->applyBlock (Y_blk, A_times_Y);
1470  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1471  Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1472  X_blk, A_times_Y, STS::one ());
1473  }
1474 }
1475 
1476 template<class MatrixType>
1477 void
1478 Relaxation<MatrixType>::
1479 ApplyInverseGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1480  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1481 {
1482  using this_type = Relaxation<MatrixType>;
1483  // The CrsMatrix version is faster, because it can access the sparse
1484  // matrix data directly, rather than by copying out each row's data
1485  // in turn. Thus, we check whether the RowMatrix is really a
1486  // CrsMatrix.
1487  //
1488  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1489  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1490  // will still be correct if the cast fails, but it will use an
1491  // unoptimized kernel.
1492  const block_crs_matrix_type* blockCrsMat =
1493  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1494  const crs_matrix_type* crsMat =
1495  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
1496  if (blockCrsMat != nullptr) {
1497  const_cast<this_type*> (this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1498  }
1499  else if (crsMat != nullptr) {
1500  ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1501  }
1502  else {
1503  ApplyInverseGS_RowMatrix (X, Y);
1504  }
1505 }
1506 
1507 
1508 template<class MatrixType>
1509 void
1510 Relaxation<MatrixType>::
1511 ApplyInverseGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1512  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1513 {
1514  using Teuchos::Array;
1515  using Teuchos::ArrayRCP;
1516  using Teuchos::ArrayView;
1517  using Teuchos::as;
1518  using Teuchos::RCP;
1519  using Teuchos::rcp;
1520  using Teuchos::rcpFromRef;
1521  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1522 
1523  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1524  // starting multivector itself. The generic RowMatrix version here
1525  // does not, so we have to zero out Y here.
1526  if (ZeroStartingSolution_) {
1527  Y.putScalar (STS::zero ());
1528  }
1529 
1530  const size_t NumVectors = X.getNumVectors ();
1531  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1532  Array<local_ordinal_type> Indices (maxLength);
1533  Array<scalar_type> Values (maxLength);
1534 
1535  // Local smoothing stuff
1536  const size_t numMyRows = A_->getNodeNumRows();
1537  const local_ordinal_type* rowInd = 0;
1538  size_t numActive = numMyRows;
1539  bool do_local = ! localSmoothingIndices_.is_null ();
1540  if (do_local) {
1541  rowInd = localSmoothingIndices_.getRawPtr ();
1542  numActive = localSmoothingIndices_.size ();
1543  }
1544 
1545  RCP<MV> Y2;
1546  if (IsParallel_) {
1547  if (Importer_.is_null ()) { // domain and column Maps are the same.
1548  updateCachedMultiVector (Y.getMap (), NumVectors);
1549  }
1550  else {
1551  updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
1552  }
1553  Y2 = cachedMV_;
1554  }
1555  else {
1556  Y2 = rcpFromRef (Y);
1557  }
1558 
1559  // Diagonal
1560  ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1561  ArrayView<const scalar_type> d_ptr = d_rcp();
1562 
1563  // Constant stride check
1564  bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1565 
1566  if (constant_stride) {
1567  // extract 1D RCPs
1568  size_t x_stride = X.getStride();
1569  size_t y2_stride = Y2->getStride();
1570  ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1571  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1572  ArrayView<scalar_type> y2_ptr = y2_rcp();
1573  ArrayView<const scalar_type> x_ptr = x_rcp();
1574  Array<scalar_type> dtemp(NumVectors,STS::zero());
1575 
1576  for (int j = 0; j < NumSweeps_; ++j) {
1577  // data exchange is here, once per sweep
1578  if (IsParallel_) {
1579  if (Importer_.is_null ()) {
1580  // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1581  // clear if this is correct. Reevaluate at some point.
1582 
1583  *Y2 = Y; // just copy, since domain and column Maps are the same
1584  } else {
1585  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1586  }
1587  }
1588 
1589  if (! DoBackwardGS_) { // Forward sweep
1590  for (size_t ii = 0; ii < numActive; ++ii) {
1591  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1592  size_t NumEntries;
1593  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1594  dtemp.assign(NumVectors,STS::zero());
1595 
1596  for (size_t k = 0; k < NumEntries; ++k) {
1597  const local_ordinal_type col = Indices[k];
1598  for (size_t m = 0; m < NumVectors; ++m) {
1599  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1600  }
1601  }
1602 
1603  for (size_t m = 0; m < NumVectors; ++m) {
1604  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1605  }
1606  }
1607  }
1608  else { // Backward sweep
1609  // ptrdiff_t is the same size as size_t, but is signed. Being
1610  // signed is important so that i >= 0 is not trivially true.
1611  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1612  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1613  size_t NumEntries;
1614  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1615  dtemp.assign (NumVectors, STS::zero ());
1616 
1617  for (size_t k = 0; k < NumEntries; ++k) {
1618  const local_ordinal_type col = Indices[k];
1619  for (size_t m = 0; m < NumVectors; ++m) {
1620  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1621  }
1622  }
1623 
1624  for (size_t m = 0; m < NumVectors; ++m) {
1625  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1626  }
1627  }
1628  }
1629  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1630  if (IsParallel_) {
1631  Tpetra::deep_copy (Y, *Y2);
1632  }
1633  }
1634  }
1635  else {
1636  // extract 2D RCPS
1637  ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1638  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1639 
1640  for (int j = 0; j < NumSweeps_; ++j) {
1641  // data exchange is here, once per sweep
1642  if (IsParallel_) {
1643  if (Importer_.is_null ()) {
1644  *Y2 = Y; // just copy, since domain and column Maps are the same
1645  } else {
1646  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1647  }
1648  }
1649 
1650  if (! DoBackwardGS_) { // Forward sweep
1651  for (size_t ii = 0; ii < numActive; ++ii) {
1652  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1653  size_t NumEntries;
1654  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1655 
1656  for (size_t m = 0; m < NumVectors; ++m) {
1657  scalar_type dtemp = STS::zero ();
1658  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1659  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1660 
1661  for (size_t k = 0; k < NumEntries; ++k) {
1662  const local_ordinal_type col = Indices[k];
1663  dtemp += Values[k] * y2_local[col];
1664  }
1665  y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1666  }
1667  }
1668  }
1669  else { // Backward sweep
1670  // ptrdiff_t is the same size as size_t, but is signed. Being
1671  // signed is important so that i >= 0 is not trivially true.
1672  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1673  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1674 
1675  size_t NumEntries;
1676  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1677 
1678  for (size_t m = 0; m < NumVectors; ++m) {
1679  scalar_type dtemp = STS::zero ();
1680  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1681  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1682 
1683  for (size_t k = 0; k < NumEntries; ++k) {
1684  const local_ordinal_type col = Indices[k];
1685  dtemp += Values[k] * y2_local[col];
1686  }
1687  y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1688  }
1689  }
1690  }
1691 
1692  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1693  if (IsParallel_) {
1694  Tpetra::deep_copy (Y, *Y2);
1695  }
1696  }
1697  }
1698 
1699 
1700  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1701  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1702  const double numVectors = as<double> (X.getNumVectors ());
1703  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1704  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1705  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1706  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1707 }
1708 
1709 
1710 template<class MatrixType>
1711 void
1712 Relaxation<MatrixType>::
1713 ApplyInverseGS_CrsMatrix (const crs_matrix_type& A,
1714  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1715  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1716 {
1717  using Teuchos::as;
1718  const Tpetra::ESweepDirection direction =
1719  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1720  if (localSmoothingIndices_.is_null ()) {
1721  A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1722  NumSweeps_, ZeroStartingSolution_);
1723  }
1724  else {
1725  A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1726  DampingFactor_, direction,
1727  NumSweeps_, ZeroStartingSolution_);
1728  }
1729 
1730  // For each column of output, for each sweep over the matrix:
1731  //
1732  // - One + and one * for each matrix entry
1733  // - One / and one + for each row of the matrix
1734  // - If the damping factor is not one: one * for each row of the
1735  // matrix. (It's not fair to count this if the damping factor is
1736  // one, since the implementation could skip it. Whether it does
1737  // or not is the implementation's choice.)
1738 
1739  // Floating-point operations due to the damping factor, per matrix
1740  // row, per direction, per columm of output.
1741  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1742  const double numVectors = as<double> (X.getNumVectors ());
1743  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1744  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1745  ApplyFlops_ += NumSweeps_ * numVectors *
1746  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1747 }
1748 
1749 template<class MatrixType>
1750 void
1751 Relaxation<MatrixType>::
1752 ApplyInverseGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1753  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1754  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1755 {
1756  using Teuchos::RCP;
1757  using Teuchos::rcp;
1758  using Teuchos::rcpFromRef;
1759  typedef Tpetra::BlockMultiVector<scalar_type,
1760  local_ordinal_type, global_ordinal_type, node_type> BMV;
1761  typedef Tpetra::MultiVector<scalar_type,
1762  local_ordinal_type, global_ordinal_type, node_type> MV;
1763 
1764  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1765  //
1766  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1767  // multiple right-hand sides, unless the input or output MultiVector
1768  // does not have constant stride. We should check for that case
1769  // here, in case it doesn't work in localGaussSeidel (which is
1770  // entirely possible).
1771  BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1772  const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1773 
1774  bool performImport = false;
1775  RCP<BMV> yBlockCol;
1776  if (Importer_.is_null ()) {
1777  yBlockCol = rcpFromRef (yBlock);
1778  }
1779  else {
1780  if (yBlockColumnPointMap_.is_null () ||
1781  yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1782  yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1783  yBlockColumnPointMap_ =
1784  rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
1785  static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
1786  }
1787  yBlockCol = yBlockColumnPointMap_;
1788  performImport = true;
1789  }
1790 
1791  if (ZeroStartingSolution_) {
1792  yBlockCol->putScalar (STS::zero ());
1793  }
1794  else if (performImport) {
1795  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1796  }
1797 
1798  const Tpetra::ESweepDirection direction =
1799  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1800 
1801  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1802  if (performImport && sweep > 0) {
1803  yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
1804  }
1805  A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
1806  DampingFactor_, direction);
1807  if (performImport) {
1808  RCP<const MV> yBlockColPointDomain =
1809  yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1810  Tpetra::deep_copy (Y, *yBlockColPointDomain);
1811  }
1812  }
1813 }
1814 
1815 template<class MatrixType>
1816 void
1817 Relaxation<MatrixType>::
1818 MTGaussSeidel (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1819  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1820  const Tpetra::ESweepDirection direction) const
1821 {
1822  using Teuchos::null;
1823  using Teuchos::RCP;
1824  using Teuchos::rcp;
1825  using Teuchos::rcpFromRef;
1826  using Teuchos::rcp_const_cast;
1827  using Teuchos::as;
1828 
1829  typedef scalar_type Scalar;
1830  typedef local_ordinal_type LocalOrdinal;
1831  typedef global_ordinal_type GlobalOrdinal;
1832  typedef node_type Node;
1833 
1834  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1835  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1836 
1837  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (A_.get());
1839  (crsMat == nullptr, std::logic_error, "Ifpack2::Relaxation::apply: "
1840  "Multithreaded Gauss-Seidel methods currently only work when the "
1841  "input matrix is a Tpetra::CrsMatrix.");
1842 
1843  //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1845  (! localSmoothingIndices_.is_null (), std::logic_error,
1846  "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1847  "implement the case where the user supplies an iteration order. "
1848  "This error used to appear as \"MT GaussSeidel ignores the given "
1849  "order\". "
1850  "I tried to add more explanation, but I didn't implement \"MT "
1851  "GaussSeidel\" [sic]. "
1852  "You'll have to ask the person who did.");
1853 
1855  (crsMat == nullptr, std::logic_error, prefix << "The matrix is null."
1856  " This should never happen. Please report this bug to the Ifpack2 "
1857  "developers.");
1859  (! crsMat->isFillComplete (), std::runtime_error, prefix << "The "
1860  "input CrsMatrix is not fill complete. Please call fillComplete "
1861  "on the matrix, then do setup again, before calling apply(). "
1862  "\"Do setup\" means that if only the matrix's values have changed "
1863  "since last setup, you need only call compute(). If the matrix's "
1864  "structure may also have changed, you must first call initialize(), "
1865  "then call compute(). If you have not set up this preconditioner "
1866  "for this matrix before, you must first call initialize(), then "
1867  "call compute().");
1869  (NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = "
1870  << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1871  "developers.");
1872  if (NumSweeps_ == 0) {
1873  return;
1874  }
1875 
1876  typedef typename Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1877  typedef typename crs_matrix_type::import_type import_type;
1878  typedef typename crs_matrix_type::export_type export_type;
1879  typedef typename crs_matrix_type::map_type map_type;
1880 
1881  RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1882  RCP<const export_type> exporter = crsMat->getGraph ()->getExporter ();
1884  ! exporter.is_null (), std::runtime_error,
1885  "This method's implementation currently requires that the matrix's row, "
1886  "domain, and range Maps be the same. This cannot be the case, because "
1887  "the matrix has a nontrivial Export object.");
1888 
1889  RCP<const map_type> domainMap = crsMat->getDomainMap ();
1890  RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1891  RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1892  RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1893 
1894 #ifdef HAVE_IFPACK2_DEBUG
1895  {
1896  // The relation 'isSameAs' is transitive. It's also a
1897  // collective, so we don't have to do a "shared" test for
1898  // exception (i.e., a global reduction on the test value).
1900  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1901  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1902  "multivector X be in the domain Map of the matrix.");
1904  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1905  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1906  "B be in the range Map of the matrix.");
1908  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1909  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1910  "D be in the row Map of the matrix.");
1912  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1913  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1914  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1916  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1917  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1918  "the range Map of the matrix be the same.");
1919  }
1920 #else
1921  // Forestall any compiler warnings for unused variables.
1922  (void) rangeMap;
1923  (void) rowMap;
1924 #endif // HAVE_IFPACK2_DEBUG
1925 
1926  // Fetch a (possibly cached) temporary column Map multivector
1927  // X_colMap, and a domain Map view X_domainMap of it. Both have
1928  // constant stride by construction. We know that the domain Map
1929  // must include the column Map, because our Gauss-Seidel kernel
1930  // requires that the row Map, domain Map, and range Map are all
1931  // the same, and that each process owns all of its own diagonal
1932  // entries of the matrix.
1933 
1934  RCP<MV> X_colMap;
1935  RCP<MV> X_domainMap;
1936  bool copyBackOutput = false;
1937  if (importer.is_null ()) {
1938  if (X.isConstantStride ()) {
1939  X_colMap = rcpFromRef (X);
1940  X_domainMap = rcpFromRef (X);
1941 
1942  // Column Map and domain Map are the same, so there are no
1943  // remote entries. Thus, if we are not setting the initial
1944  // guess to zero, we don't have to worry about setting remote
1945  // entries to zero, even though we are not doing an Import in
1946  // this case.
1947  if (ZeroStartingSolution_) {
1948  X_colMap->putScalar (ZERO);
1949  }
1950  // No need to copy back to X at end.
1951  }
1952  else {
1953  // We must copy X into a constant stride multivector.
1954  // Just use the cached column Map multivector for that.
1955  // force=true means fill with zeros, so no need to fill
1956  // remote entries (not in domain Map) with zeros.
1957  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
1958  X_colMap = cachedMV_;
1959  // X_domainMap is always a domain Map view of the column Map
1960  // multivector. In this case, the domain and column Maps are
1961  // the same, so X_domainMap _is_ X_colMap.
1962  X_domainMap = X_colMap;
1963  if (! ZeroStartingSolution_) { // Don't copy if zero initial guess
1964  try {
1965  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
1966  } catch (std::exception& e) {
1967  std::ostringstream os;
1968  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1969  "deep_copy(*X_domainMap, X) threw an exception: "
1970  << e.what () << ".";
1971  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
1972  }
1973  }
1974  copyBackOutput = true; // Don't forget to copy back at end.
1975  /*
1976  TPETRA_EFFICIENCY_WARNING(
1977  ! X.isConstantStride (),
1978  std::runtime_error,
1979  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
1980  "kernel requires that X and B both have constant stride. Since X "
1981  "does not have constant stride, we had to make a copy. This is a "
1982  "limitation of the current implementation and not your fault, but we "
1983  "still report it as an efficiency warning for your information.");
1984  */
1985  }
1986  }
1987  else { // Column Map and domain Map are _not_ the same.
1988  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
1989  X_colMap = cachedMV_;
1990 
1991  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1992 
1993 #ifdef HAVE_IFPACK2_DEBUG
1994  auto X_colMap_host_view = X_colMap->template getLocalView<Kokkos::HostSpace> ();
1995  auto X_domainMap_host_view = X_domainMap->template getLocalView<Kokkos::HostSpace> ();
1996 
1997  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
1999  X_colMap_host_view.data () != X_domainMap_host_view.data (),
2000  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
2001  "Pointer to start of column Map view of X is not equal to pointer to "
2002  "start of (domain Map view of) X. This may mean that "
2003  "Tpetra::MultiVector::offsetViewNonConst is broken. "
2004  "Please report this bug to the Tpetra developers.");
2005  }
2006 
2008  X_colMap_host_view.extent (0) < X_domainMap_host_view.extent (0) ||
2009  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
2010  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
2011  "X_colMap has fewer local rows than X_domainMap. "
2012  "X_colMap_host_view.extent(0) = " << X_colMap_host_view.extent (0)
2013  << ", X_domainMap_host_view.extent(0) = "
2014  << X_domainMap_host_view.extent (0)
2015  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
2016  << ", and X_domainMap->getLocalLength() = "
2017  << X_domainMap->getLocalLength ()
2018  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
2019  "is broken. Please report this bug to the Tpetra developers.");
2020 
2022  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
2023  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
2024  "X_colMap has a different number of columns than X_domainMap. "
2025  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
2026  << " != X_domainMap->getNumVectors() = "
2027  << X_domainMap->getNumVectors ()
2028  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
2029  "is broken. Please report this bug to the Tpetra developers.");
2030 #endif // HAVE_IFPACK2_DEBUG
2031 
2032  if (ZeroStartingSolution_) {
2033  // No need for an Import, since we're filling with zeros.
2034  X_colMap->putScalar (ZERO);
2035  } else {
2036  // We could just copy X into X_domainMap. However, that
2037  // wastes a copy, because the Import also does a copy (plus
2038  // communication). Since the typical use case for
2039  // Gauss-Seidel is a small number of sweeps (2 is typical), we
2040  // don't want to waste that copy. Thus, we do the Import
2041  // here, and skip the first Import in the first sweep.
2042  // Importing directly from X effects the copy into X_domainMap
2043  // (which is a view of X_colMap).
2044  X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
2045  }
2046  copyBackOutput = true; // Don't forget to copy back at end.
2047  } // if column and domain Maps are (not) the same
2048 
2049  // The Gauss-Seidel / SOR kernel expects multivectors of constant
2050  // stride. X_colMap is by construction, but B might not be. If
2051  // it's not, we have to make a copy.
2052  RCP<const MV> B_in;
2053  if (B.isConstantStride ()) {
2054  B_in = rcpFromRef (B);
2055  }
2056  else {
2057  // Range Map and row Map are the same in this case, so we can
2058  // use the cached row Map multivector to store a constant stride
2059  // copy of B.
2060  //RCP<MV> B_in_nonconst = crsMat->getRowMapMultiVector (B, true);
2061  RCP<MV> B_in_nonconst = rcp (new MV (rowMap, B.getNumVectors()));
2062  try {
2063  deep_copy (*B_in_nonconst, B);
2064  } catch (std::exception& e) {
2065  std::ostringstream os;
2066  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2067  "deep_copy(*B_in_nonconst, B) threw an exception: "
2068  << e.what () << ".";
2069  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2070  }
2071  B_in = rcp_const_cast<const MV> (B_in_nonconst);
2072 
2073  /*
2074  TPETRA_EFFICIENCY_WARNING(
2075  ! B.isConstantStride (),
2076  std::runtime_error,
2077  "MTGaussSeidel: The current implementation requires that B have "
2078  "constant stride. Since B does not have constant stride, we had to "
2079  "copy it into a separate constant-stride multivector. This is a "
2080  "limitation of the current implementation and not your fault, but we "
2081  "still report it as an efficiency warning for your information.");
2082  */
2083  }
2084 
2085  local_matrix_type kcsr = crsMat->getLocalMatrix ();
2086  const size_t NumVectors = X.getNumVectors ();
2087 
2088  bool update_y_vector = true;
2089  //false as it was done up already, and we dont want to zero it in each sweep.
2090  bool zero_x_vector = false;
2091 
2092  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2093  if (! importer.is_null () && sweep > 0) {
2094  // We already did the first Import for the zeroth sweep above,
2095  // if it was necessary.
2096  X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2097  }
2098 
2099  for (size_t indVec = 0; indVec < NumVectors; ++indVec) {
2100  if (direction == Tpetra::Symmetric) {
2101  KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2102  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2103  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2104  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2105  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2106  zero_x_vector, update_y_vector, DampingFactor_);
2107  }
2108  else if (direction == Tpetra::Forward) {
2109  KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2110  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2111  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2112  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2113  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2114  zero_x_vector, update_y_vector, DampingFactor_);
2115  }
2116  else if (direction == Tpetra::Backward) {
2117  KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2118  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2119  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2120  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2121  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2122  zero_x_vector, update_y_vector, DampingFactor_);
2123  }
2124  else {
2126  true, std::invalid_argument,
2127  prefix << "The 'direction' enum does not have any of its valid "
2128  "values: Forward, Backward, or Symmetric.");
2129  }
2130  }
2131 
2132  if (NumVectors > 1){
2133  update_y_vector = true;
2134  }
2135  else {
2136  update_y_vector = false;
2137  }
2138  }
2139 
2140  if (copyBackOutput) {
2141  try {
2142  deep_copy (X , *X_domainMap); // Copy result back into X.
2143  } catch (std::exception& e) {
2145  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2146  "threw an exception: " << e.what ());
2147  }
2148  }
2149 
2150  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2151  const double numVectors = as<double> (X.getNumVectors ());
2152  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2153  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2154  double ApplyFlops = NumSweeps_ * numVectors *
2155  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2156  if (direction == Tpetra::Symmetric)
2157  ApplyFlops *= 2.0;
2158  ApplyFlops_ += ApplyFlops;
2159 
2160 }
2161 
2162 template<class MatrixType>
2163 void
2164 Relaxation<MatrixType>::
2165 ApplyInverseMTSGS_CrsMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2166  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X) const
2167 {
2168  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2169  this->MTGaussSeidel (B, X, direction);
2170 }
2171 
2172 
2173 template<class MatrixType>
2174 void Relaxation<MatrixType>::ApplyInverseMTGS_CrsMatrix (
2175  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2176  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X) const {
2177 
2178  const Tpetra::ESweepDirection direction =
2179  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
2180  this->MTGaussSeidel (B, X, direction);
2181 }
2182 
2183 template<class MatrixType>
2184 void
2185 Relaxation<MatrixType>::
2186 ApplyInverseSGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2187  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2188 {
2189  using this_type = Relaxation<MatrixType>;
2190  // The CrsMatrix version is faster, because it can access the sparse
2191  // matrix data directly, rather than by copying out each row's data
2192  // in turn. Thus, we check whether the RowMatrix is really a
2193  // CrsMatrix.
2194  //
2195  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
2196  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
2197  // will still be correct if the cast fails, but it will use an
2198  // unoptimized kernel.
2199  const block_crs_matrix_type* blockCrsMat =
2200  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
2201  const crs_matrix_type* crsMat =
2202  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
2203  if (blockCrsMat != nullptr) {
2204  const_cast<this_type*> (this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
2205  }
2206  else if (crsMat != nullptr) {
2207  ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
2208  }
2209  else {
2210  ApplyInverseSGS_RowMatrix (X, Y);
2211  }
2212 }
2213 
2214 
2215 template<class MatrixType>
2216 void
2217 Relaxation<MatrixType>::
2218 ApplyInverseSGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2219  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2220 {
2221  using Teuchos::Array;
2222  using Teuchos::ArrayRCP;
2223  using Teuchos::ArrayView;
2224  using Teuchos::as;
2225  using Teuchos::RCP;
2226  using Teuchos::rcp;
2227  using Teuchos::rcpFromRef;
2228  using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
2229  global_ordinal_type, node_type>;
2230 
2231  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
2232  // starting multivector itself. The generic RowMatrix version here
2233  // does not, so we have to zero out Y here.
2234  if (ZeroStartingSolution_) {
2235  Y.putScalar (STS::zero ());
2236  }
2237 
2238  const size_t NumVectors = X.getNumVectors ();
2239  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
2240  Array<local_ordinal_type> Indices (maxLength);
2241  Array<scalar_type> Values (maxLength);
2242 
2243  // Local smoothing stuff
2244  const size_t numMyRows = A_->getNodeNumRows();
2245  const local_ordinal_type * rowInd = 0;
2246  size_t numActive = numMyRows;
2247  bool do_local = !localSmoothingIndices_.is_null();
2248  if(do_local) {
2249  rowInd = localSmoothingIndices_.getRawPtr();
2250  numActive = localSmoothingIndices_.size();
2251  }
2252 
2253 
2254  RCP<MV> Y2;
2255  if (IsParallel_) {
2256  if (Importer_.is_null ()) { // domain and column Maps are the same.
2257  updateCachedMultiVector (Y.getMap (), NumVectors);
2258  }
2259  else {
2260  updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
2261  }
2262  Y2 = cachedMV_;
2263  }
2264  else {
2265  Y2 = rcpFromRef (Y);
2266  }
2267 
2268  // Diagonal
2269  ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
2270  ArrayView<const scalar_type> d_ptr = d_rcp();
2271 
2272  // Constant stride check
2273  bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
2274 
2275  if(constant_stride) {
2276  // extract 1D RCPs
2277  size_t x_stride = X.getStride();
2278  size_t y2_stride = Y2->getStride();
2279  ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
2280  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
2281  ArrayView<scalar_type> y2_ptr = y2_rcp();
2282  ArrayView<const scalar_type> x_ptr = x_rcp();
2283  Array<scalar_type> dtemp(NumVectors,STS::zero());
2284 
2285  for (int j = 0; j < NumSweeps_; j++) {
2286  // data exchange is here, once per sweep
2287  if (IsParallel_) {
2288  if (Importer_.is_null ()) {
2289  // just copy, since domain and column Maps are the same
2290  Tpetra::deep_copy (*Y2, Y);
2291  } else {
2292  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2293  }
2294  }
2295  for (size_t ii = 0; ii < numActive; ++ii) {
2296  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2297  size_t NumEntries;
2298  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2299  dtemp.assign(NumVectors,STS::zero());
2300 
2301  for (size_t k = 0; k < NumEntries; ++k) {
2302  const local_ordinal_type col = Indices[k];
2303  for (size_t m = 0; m < NumVectors; ++m) {
2304  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2305  }
2306  }
2307 
2308  for (size_t m = 0; m < NumVectors; ++m) {
2309  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2310  }
2311  }
2312 
2313  // ptrdiff_t is the same size as size_t, but is signed. Being
2314  // signed is important so that i >= 0 is not trivially true.
2315  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2316  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2317  size_t NumEntries;
2318  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2319  dtemp.assign(NumVectors,STS::zero());
2320 
2321  for (size_t k = 0; k < NumEntries; ++k) {
2322  const local_ordinal_type col = Indices[k];
2323  for (size_t m = 0; m < NumVectors; ++m) {
2324  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2325  }
2326  }
2327 
2328  for (size_t m = 0; m < NumVectors; ++m) {
2329  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2330  }
2331  }
2332 
2333  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
2334  if (IsParallel_) {
2335  Tpetra::deep_copy (Y, *Y2);
2336  }
2337  }
2338  }
2339  else {
2340  // extract 2D RCPs
2341  ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
2342  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
2343 
2344  for (int iter = 0; iter < NumSweeps_; ++iter) {
2345  // only one data exchange per sweep
2346  if (IsParallel_) {
2347  if (Importer_.is_null ()) {
2348  // just copy, since domain and column Maps are the same
2349  Tpetra::deep_copy (*Y2, Y);
2350  } else {
2351  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2352  }
2353  }
2354 
2355  for (size_t ii = 0; ii < numActive; ++ii) {
2356  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2357  const scalar_type diag = d_ptr[i];
2358  size_t NumEntries;
2359  A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2360 
2361  for (size_t m = 0; m < NumVectors; ++m) {
2362  scalar_type dtemp = STS::zero ();
2363  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2364  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2365 
2366  for (size_t k = 0; k < NumEntries; ++k) {
2367  const local_ordinal_type col = Indices[k];
2368  dtemp += Values[k] * y2_local[col];
2369  }
2370  y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2371  }
2372  }
2373 
2374  // ptrdiff_t is the same size as size_t, but is signed. Being
2375  // signed is important so that i >= 0 is not trivially true.
2376  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2377  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2378  const scalar_type diag = d_ptr[i];
2379  size_t NumEntries;
2380  A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2381 
2382  for (size_t m = 0; m < NumVectors; ++m) {
2383  scalar_type dtemp = STS::zero ();
2384  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2385  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2386 
2387  for (size_t k = 0; k < NumEntries; ++k) {
2388  const local_ordinal_type col = Indices[k];
2389  dtemp += Values[k] * y2_local[col];
2390  }
2391  y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2392  }
2393  }
2394 
2395  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
2396  if (IsParallel_) {
2397  Tpetra::deep_copy (Y, *Y2);
2398  }
2399  }
2400  }
2401 
2402  // See flop count discussion in implementation of ApplyInverseSGS_CrsMatrix().
2403  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2404  const double numVectors = as<double> (X.getNumVectors ());
2405  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2406  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2407  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2408  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2409 }
2410 
2411 
2412 template<class MatrixType>
2413 void
2414 Relaxation<MatrixType>::
2415 ApplyInverseSGS_CrsMatrix (const crs_matrix_type& A,
2416  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2417  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2418 {
2419  using Teuchos::as;
2420  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2421  if (localSmoothingIndices_.is_null ()) {
2422  A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
2423  NumSweeps_, ZeroStartingSolution_);
2424  }
2425  else {
2426  A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
2427  DampingFactor_, direction,
2428  NumSweeps_, ZeroStartingSolution_);
2429  }
2430 
2431  // For each column of output, for each sweep over the matrix:
2432  //
2433  // - One + and one * for each matrix entry
2434  // - One / and one + for each row of the matrix
2435  // - If the damping factor is not one: one * for each row of the
2436  // matrix. (It's not fair to count this if the damping factor is
2437  // one, since the implementation could skip it. Whether it does
2438  // or not is the implementation's choice.)
2439  //
2440  // Each sweep of symmetric Gauss-Seidel / SOR counts as two sweeps,
2441  // one forward and one backward.
2442 
2443  // Floating-point operations due to the damping factor, per matrix
2444  // row, per direction, per columm of output.
2445  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2446  const double numVectors = as<double> (X.getNumVectors ());
2447  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2448  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2449  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2450  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2451 }
2452 
2453 template<class MatrixType>
2454 void
2455 Relaxation<MatrixType>::
2456 ApplyInverseSGS_BlockCrsMatrix (const block_crs_matrix_type& A,
2457  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2458  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
2459 {
2460  using Teuchos::RCP;
2461  using Teuchos::rcp;
2462  using Teuchos::rcpFromRef;
2463  typedef Tpetra::BlockMultiVector<scalar_type,
2464  local_ordinal_type, global_ordinal_type, node_type> BMV;
2465  typedef Tpetra::MultiVector<scalar_type,
2466  local_ordinal_type, global_ordinal_type, node_type> MV;
2467 
2468  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
2469  //
2470  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
2471  // multiple right-hand sides, unless the input or output MultiVector
2472  // does not have constant stride. We should check for that case
2473  // here, in case it doesn't work in localGaussSeidel (which is
2474  // entirely possible).
2475  BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
2476  const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
2477 
2478  bool performImport = false;
2479  RCP<BMV> yBlockCol;
2480  if (Importer_.is_null ()) {
2481  yBlockCol = Teuchos::rcpFromRef (yBlock);
2482  }
2483  else {
2484  if (yBlockColumnPointMap_.is_null () ||
2485  yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
2486  yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
2487  yBlockColumnPointMap_ =
2488  rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
2489  static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
2490  }
2491  yBlockCol = yBlockColumnPointMap_;
2492  performImport = true;
2493  }
2494 
2495  if (ZeroStartingSolution_) {
2496  yBlockCol->putScalar (STS::zero ());
2497  }
2498  else if (performImport) {
2499  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2500  }
2501 
2502  // FIXME (mfh 12 Sep 2014) Shouldn't this come from the user's parameter?
2503  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2504 
2505  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2506  if (performImport && sweep > 0) {
2507  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2508  }
2509  A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
2510  DampingFactor_, direction);
2511  if (performImport) {
2512  RCP<const MV> yBlockColPointDomain =
2513  yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
2514  MV yBlockView = yBlock.getMultiVectorView ();
2515  Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
2516  }
2517  }
2518 }
2519 
2520 
2521 template<class MatrixType>
2523 {
2524  std::ostringstream os;
2525 
2526  // Output is a valid YAML dictionary in flow style. If you don't
2527  // like everything on a single line, you should call describe()
2528  // instead.
2529  os << "\"Ifpack2::Relaxation\": {";
2530 
2531  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2532  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2533 
2534  // It's useful to print this instance's relaxation method (Jacobi,
2535  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2536  // than that, call describe() instead.
2537  os << "Type: ";
2538  if (PrecType_ == Ifpack2::Details::JACOBI) {
2539  os << "Jacobi";
2540  } else if (PrecType_ == Ifpack2::Details::GS) {
2541  os << "Gauss-Seidel";
2542  } else if (PrecType_ == Ifpack2::Details::SGS) {
2543  os << "Symmetric Gauss-Seidel";
2544  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2545  os << "MT Gauss-Seidel";
2546  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2547  os << "MT Symmetric Gauss-Seidel";
2548  }
2549  else {
2550  os << "INVALID";
2551  }
2552 
2553  os << ", " << "sweeps: " << NumSweeps_ << ", "
2554  << "damping factor: " << DampingFactor_ << ", ";
2555  if (DoL1Method_) {
2556  os << "use l1: " << DoL1Method_ << ", "
2557  << "l1 eta: " << L1Eta_ << ", ";
2558  }
2559 
2560  if (A_.is_null ()) {
2561  os << "Matrix: null";
2562  }
2563  else {
2564  os << "Global matrix dimensions: ["
2565  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2566  << ", Global nnz: " << A_->getGlobalNumEntries();
2567  }
2568 
2569  os << "}";
2570  return os.str ();
2571 }
2572 
2573 
2574 template<class MatrixType>
2575 void
2578  const Teuchos::EVerbosityLevel verbLevel) const
2579 {
2580  using Teuchos::OSTab;
2582  using Teuchos::VERB_DEFAULT;
2583  using Teuchos::VERB_NONE;
2584  using Teuchos::VERB_LOW;
2585  using Teuchos::VERB_MEDIUM;
2586  using Teuchos::VERB_HIGH;
2587  using Teuchos::VERB_EXTREME;
2588  using std::endl;
2589 
2590  const Teuchos::EVerbosityLevel vl =
2591  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2592 
2593  const int myRank = this->getComm ()->getRank ();
2594 
2595  // none: print nothing
2596  // low: print O(1) info from Proc 0
2597  // medium:
2598  // high:
2599  // extreme:
2600 
2601  if (vl != VERB_NONE && myRank == 0) {
2602  // Describable interface asks each implementation to start with a tab.
2603  OSTab tab1 (out);
2604  // Output is valid YAML; hence the quotes, to protect the colons.
2605  out << "\"Ifpack2::Relaxation\":" << endl;
2606  OSTab tab2 (out);
2607  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2608  << endl;
2609  if (this->getObjectLabel () != "") {
2610  out << "Label: " << this->getObjectLabel () << endl;
2611  }
2612  out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2613  << "Computed: " << (isComputed () ? "true" : "false") << endl
2614  << "Parameters: " << endl;
2615  {
2616  OSTab tab3 (out);
2617  out << "\"relaxation: type\": ";
2618  if (PrecType_ == Ifpack2::Details::JACOBI) {
2619  out << "Jacobi";
2620  } else if (PrecType_ == Ifpack2::Details::GS) {
2621  out << "Gauss-Seidel";
2622  } else if (PrecType_ == Ifpack2::Details::SGS) {
2623  out << "Symmetric Gauss-Seidel";
2624  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2625  out << "MT Gauss-Seidel";
2626  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2627  out << "MT Symmetric Gauss-Seidel";
2628  } else {
2629  out << "INVALID";
2630  }
2631  // We quote these parameter names because they contain colons.
2632  // YAML uses the colon to distinguish key from value.
2633  out << endl
2634  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2635  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2636  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2637  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2638  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2639  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2640  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2641  }
2642  out << "Computed quantities:" << endl;
2643  {
2644  OSTab tab3 (out);
2645  out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2646  << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2647  }
2648  if (checkDiagEntries_ && isComputed ()) {
2649  out << "Properties of input diagonal entries:" << endl;
2650  {
2651  OSTab tab3 (out);
2652  out << "Magnitude of minimum-magnitude diagonal entry: "
2653  << globalMinMagDiagEntryMag_ << endl
2654  << "Magnitude of maximum-magnitude diagonal entry: "
2655  << globalMaxMagDiagEntryMag_ << endl
2656  << "Number of diagonal entries with small magnitude: "
2657  << globalNumSmallDiagEntries_ << endl
2658  << "Number of zero diagonal entries: "
2659  << globalNumZeroDiagEntries_ << endl
2660  << "Number of diagonal entries with negative real part: "
2661  << globalNumNegDiagEntries_ << endl
2662  << "Abs 2-norm diff between computed and actual inverse "
2663  << "diagonal: " << globalDiagNormDiff_ << endl;
2664  }
2665  }
2666  if (isComputed ()) {
2667  out << "Saved diagonal offsets: "
2668  << (savedDiagOffsets_ ? "true" : "false") << endl;
2669  }
2670  out << "Call counts and total times (in seconds): " << endl;
2671  {
2672  OSTab tab3 (out);
2673  out << "initialize: " << endl;
2674  {
2675  OSTab tab4 (out);
2676  out << "Call count: " << NumInitialize_ << endl;
2677  out << "Total time: " << InitializeTime_ << endl;
2678  }
2679  out << "compute: " << endl;
2680  {
2681  OSTab tab4 (out);
2682  out << "Call count: " << NumCompute_ << endl;
2683  out << "Total time: " << ComputeTime_ << endl;
2684  }
2685  out << "apply: " << endl;
2686  {
2687  OSTab tab4 (out);
2688  out << "Call count: " << NumApply_ << endl;
2689  out << "Total time: " << ApplyTime_ << endl;
2690  }
2691  }
2692  }
2693 }
2694 
2695 } // namespace Ifpack2
2696 
2697 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2698  template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2699 
2700 #endif // IFPACK2_RELAXATION_DEF_HPP
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:444
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:486
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:492
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2577
bool is_null(const boost::shared_ptr< T > &p)
basic_OSTab< char > OSTab
static magnitudeType eps()
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:216
T & get(const std::string &name, T def_value)
void compute()
Compute the preconditioner (&quot;numeric setup&quot;);.
Definition: Ifpack2_Relaxation_def.hpp:928
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static RCP< Time > getNewCounter(const std::string &name)
static RCP< Time > lookupCounter(const std::string &name)
virtual void printDoc(std::string const &docString, std::ostream &out) const =0
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:499
virtual const std::string getXMLTypeName() const =0
static std::ostream & printLines(std::ostream &os, const std::string &linePrefix, const std::string &lines)
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:401
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:196
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:462
T * getRawPtr() const
bool remove(std::string const &name, bool throwIfNotExists=true)
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:474
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:258
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:421
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:391
File for utility functions.
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2522
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:450
void validateParametersAndSetDefaults(ParameterList const &validParamList, int const depth=1000)
TEUCHOS_DEPRECATED void reduceAll(const Comm< Ordinal > &comm, const EReductionType reductType, const Packet &send, Packet *globalReduct)
virtual ValidStringsList validStringValues() const =0
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:434
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:255
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:468
any & getAny(bool activeQry=true)
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:615
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:412
TypeTo as(const TypeFrom &t)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:252
bool isType(const std::string &name) const
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:249
double totalElapsedTime(bool readCurrentTime=false) const
std::string typeName() const
const std::type_info & type() const
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:456
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:232
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:480
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:50
T * get() const
void initialize()
Initialize the preconditioner (&quot;symbolic setup&quot;).
Definition: Ifpack2_Relaxation_def.hpp:636
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Relaxation_decl.hpp:265
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:512
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:258
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:261
virtual void validate(ParameterEntry const &entry, std::string const &paramName, std::string const &sublistName) const =0
bool is_null() const