Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Relaxation_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_RELAXATION_DEF_HPP
44 #define IFPACK2_RELAXATION_DEF_HPP
45 
46 #include "Teuchos_StandardParameterEntryValidators.hpp"
47 #include "Teuchos_TimeMonitor.hpp"
48 #include "Tpetra_CrsMatrix.hpp"
49 #include "Tpetra_Experimental_BlockCrsMatrix.hpp"
50 #include "Tpetra_Experimental_BlockView.hpp"
51 #include "Ifpack2_Utilities.hpp"
52 #include "MatrixMarket_Tpetra.hpp"
53 #include <cstdlib>
54 #include <sstream>
55 #include "KokkosSparse_gauss_seidel.hpp"
56 
57 namespace {
58  // Validate that a given int is nonnegative.
59  class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
60  public:
61  // Constructor (does nothing).
62  NonnegativeIntValidator () {}
63 
64  // ParameterEntryValidator wants this method.
66  return Teuchos::null;
67  }
68 
69  // Actually validate the parameter's value.
70  void
71  validate (const Teuchos::ParameterEntry& entry,
72  const std::string& paramName,
73  const std::string& sublistName) const
74  {
75  using std::endl;
76  Teuchos::any anyVal = entry.getAny (true);
77  const std::string entryName = entry.getAny (false).typeName ();
78 
80  anyVal.type () != typeid (int),
82  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
83  << "\" has the wrong type." << endl << "Parameter: " << paramName
84  << endl << "Type specified: " << entryName << endl
85  << "Type required: int" << endl);
86 
87  const int val = Teuchos::any_cast<int> (anyVal);
90  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
91  << "\" is negative." << endl << "Parameter: " << paramName
92  << endl << "Value specified: " << val << endl
93  << "Required range: [0, INT_MAX]" << endl);
94  }
95 
96  // ParameterEntryValidator wants this method.
97  const std::string getXMLTypeName () const {
98  return "NonnegativeIntValidator";
99  }
100 
101  // ParameterEntryValidator wants this method.
102  void
103  printDoc (const std::string& docString,
104  std::ostream &out) const
105  {
106  Teuchos::StrUtils::printLines (out, "# ", docString);
107  out << "#\tValidator Used: " << std::endl;
108  out << "#\t\tNonnegativeIntValidator" << std::endl;
109  }
110  };
111 
112  // A way to get a small positive number (eps() for floating-point
113  // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
114  // define it (for example, for integer values).
115  template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
116  class SmallTraits {
117  public:
118  // Return eps if Scalar is a floating-point type, else return 1.
119  static const Scalar eps ();
120  };
121 
122  // Partial specialization for when Scalar is not a floating-point type.
123  template<class Scalar>
124  class SmallTraits<Scalar, true> {
125  public:
126  static const Scalar eps () {
128  }
129  };
130 
131  // Partial specialization for when Scalar is a floating-point type.
132  template<class Scalar>
133  class SmallTraits<Scalar, false> {
134  public:
135  static const Scalar eps () {
137  }
138  };
139 
140 
141 
142 } // namespace (anonymous)
143 
144 namespace Ifpack2 {
145 
146 template<class MatrixType>
147 void Relaxation<MatrixType>::updateCachedMultiVector(const Teuchos::RCP<const Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> > & map, size_t numVecs) const{
148  // Allocate a multivector if the cached one isn't perfect
149  // Note: We check for map pointer equality here since it is much cheaper than isSameAs()
150  if(cachedMV_.is_null() || &*map != &*cachedMV_->getMap() || cachedMV_->getNumVectors() !=numVecs)
151  cachedMV_ = Teuchos::rcp(new Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>(map, numVecs, false));
152 }
153 
154 
155 template<class MatrixType>
158 {
159  if (A.getRawPtr () != A_.getRawPtr ()) { // it's a different matrix
160  Importer_ = Teuchos::null;
161  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
162  isInitialized_ = false;
163  IsComputed_ = false;
164  diagOffsets_ = Kokkos::View<size_t*, typename node_type::device_type> ();
165  savedDiagOffsets_ = false;
166  hasBlockCrsMatrix_ = false;
167  if (! A.is_null ()) {
168  IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
169  }
170  A_ = A;
171  }
172 }
173 
174 
175 template<class MatrixType>
178 : A_ (A),
179  NumSweeps_ (1),
180  PrecType_ (Ifpack2::Details::JACOBI),
181  DampingFactor_ (STS::one ()),
182  IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
183  false : // a reasonable default if there's no communicator
184  A->getRowMap ()->getComm ()->getSize () > 1),
185  ZeroStartingSolution_ (true),
186  DoBackwardGS_ (false),
187  DoL1Method_ (false),
188  L1Eta_ (Teuchos::as<magnitude_type> (1.5)),
189  MinDiagonalValue_ (STS::zero ()),
190  fixTinyDiagEntries_ (false),
191  checkDiagEntries_ (false),
192  is_matrix_structurally_symmetric_ (false),
193  ifpack2_dump_matrix_(false),
194  isInitialized_ (false),
195  IsComputed_ (false),
196  NumInitialize_ (0),
197  NumCompute_ (0),
198  NumApply_ (0),
199  InitializeTime_ (0.0), // Times are double anyway, so no need for ScalarTraits.
200  ComputeTime_ (0.0),
201  ApplyTime_ (0.0),
202  ComputeFlops_ (0.0),
203  ApplyFlops_ (0.0),
204  globalMinMagDiagEntryMag_ (STM::zero ()),
205  globalMaxMagDiagEntryMag_ (STM::zero ()),
206  globalNumSmallDiagEntries_ (0),
207  globalNumZeroDiagEntries_ (0),
208  globalNumNegDiagEntries_ (0),
209  globalDiagNormDiff_(Teuchos::ScalarTraits<magnitude_type>::zero()),
210  savedDiagOffsets_ (false),
211  hasBlockCrsMatrix_ (false)
212 {
213  this->setObjectLabel ("Ifpack2::Relaxation");
214 }
215 
216 //==========================================================================
217 template<class MatrixType>
219 }
220 
221 template<class MatrixType>
224 {
225  using Teuchos::Array;
227  using Teuchos::parameterList;
228  using Teuchos::RCP;
229  using Teuchos::rcp;
230  using Teuchos::rcp_const_cast;
231  using Teuchos::rcp_implicit_cast;
232  using Teuchos::setStringToIntegralParameter;
234 
235  if (validParams_.is_null ()) {
236  RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
237 
238  // Set a validator that automatically converts from the valid
239  // string options to their enum values.
240  Array<std::string> precTypes (5);
241  precTypes[0] = "Jacobi";
242  precTypes[1] = "Gauss-Seidel";
243  precTypes[2] = "Symmetric Gauss-Seidel";
244  precTypes[3] = "MT Gauss-Seidel";
245  precTypes[4] = "MT Symmetric Gauss-Seidel";
246  Array<Details::RelaxationType> precTypeEnums (5);
247  precTypeEnums[0] = Details::JACOBI;
248  precTypeEnums[1] = Details::GS;
249  precTypeEnums[2] = Details::SGS;
250  precTypeEnums[3] = Details::MTGS;
251  precTypeEnums[4] = Details::MTSGS;
252  const std::string defaultPrecType ("Jacobi");
253  setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
254  defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
255  pl.getRawPtr ());
256 
257  const int numSweeps = 1;
258  RCP<PEV> numSweepsValidator =
259  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
260  pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
261  rcp_const_cast<const PEV> (numSweepsValidator));
262 
263  const scalar_type dampingFactor = STS::one ();
264  pl->set ("relaxation: damping factor", dampingFactor);
265 
266  const bool zeroStartingSolution = true;
267  pl->set ("relaxation: zero starting solution", zeroStartingSolution);
268 
269  const bool doBackwardGS = false;
270  pl->set ("relaxation: backward mode", doBackwardGS);
271 
272  const bool doL1Method = false;
273  pl->set ("relaxation: use l1", doL1Method);
274 
275  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
276  (STM::one() + STM::one()); // 1.5
277  pl->set ("relaxation: l1 eta", l1eta);
278 
279  const scalar_type minDiagonalValue = STS::zero ();
280  pl->set ("relaxation: min diagonal value", minDiagonalValue);
281 
282  const bool fixTinyDiagEntries = false;
283  pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
284 
285  const bool checkDiagEntries = false;
286  pl->set ("relaxation: check diagonal entries", checkDiagEntries);
287 
288  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
289  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
290 
291  const bool is_matrix_structurally_symmetric = false;
292  pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
293 
294  const bool ifpack2_dump_matrix = false;
295  pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
296 
297  validParams_ = rcp_const_cast<const ParameterList> (pl);
298  }
299  return validParams_;
300 }
301 
302 
303 template<class MatrixType>
305 {
306  using Teuchos::getIntegralValue;
308  using Teuchos::RCP;
309  typedef scalar_type ST; // just to make code below shorter
310 
311  if (pl.isType<double>("relaxation: damping factor")) {
312  // Make sure that ST=complex can run with a damping factor that is
313  // a double.
314  ST df = pl.get<double>("relaxation: damping factor");
315  pl.remove("relaxation: damping factor");
316  pl.set("relaxation: damping factor",df);
317  }
318 
320 
321  const Details::RelaxationType precType =
322  getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
323  const int numSweeps = pl.get<int> ("relaxation: sweeps");
324  const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
325  const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
326  const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
327  const bool doL1Method = pl.get<bool> ("relaxation: use l1");
328  const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
329  const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
330  const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
331  const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
332  const bool is_matrix_structurally_symmetric = pl.get<bool> ("relaxation: symmetric matrix structure");
333  const bool ifpack2_dump_matrix = pl.get<bool> ("relaxation: ifpack2 dump matrix");
334 
335  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
336 
337 
338  // "Commit" the changes, now that we've validated everything.
339  PrecType_ = precType;
340  NumSweeps_ = numSweeps;
341  DampingFactor_ = dampingFactor;
342  ZeroStartingSolution_ = zeroStartSol;
343  DoBackwardGS_ = doBackwardGS;
344  DoL1Method_ = doL1Method;
345  L1Eta_ = l1Eta;
346  MinDiagonalValue_ = minDiagonalValue;
347  fixTinyDiagEntries_ = fixTinyDiagEntries;
348  checkDiagEntries_ = checkDiagEntries;
349  is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
350  ifpack2_dump_matrix_ = ifpack2_dump_matrix;
351  localSmoothingIndices_ = localSmoothingIndices;
352 }
353 
354 
355 template<class MatrixType>
357 {
358  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
359  // but otherwise, we will get [unused] in pl
360  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
361 }
362 
363 
364 template<class MatrixType>
368  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
369  "The input matrix A is null. Please call setMatrix() with a nonnull "
370  "input matrix before calling this method.");
371  return A_->getRowMap ()->getComm ();
372 }
373 
374 
375 template<class MatrixType>
378  return A_;
379 }
380 
381 
382 template<class MatrixType>
383 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
384  typename MatrixType::global_ordinal_type,
385  typename MatrixType::node_type> >
388  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
389  "The input matrix A is null. Please call setMatrix() with a nonnull "
390  "input matrix before calling this method.");
391  return A_->getDomainMap ();
392 }
393 
394 
395 template<class MatrixType>
396 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
397  typename MatrixType::global_ordinal_type,
398  typename MatrixType::node_type> >
401  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
402  "The input matrix A is null. Please call setMatrix() with a nonnull "
403  "input matrix before calling this method.");
404  return A_->getRangeMap ();
405 }
406 
407 
408 template<class MatrixType>
410  return true;
411 }
412 
413 
414 template<class MatrixType>
416  return(NumInitialize_);
417 }
418 
419 
420 template<class MatrixType>
422  return(NumCompute_);
423 }
424 
425 
426 template<class MatrixType>
428  return(NumApply_);
429 }
430 
431 
432 template<class MatrixType>
434  return(InitializeTime_);
435 }
436 
437 
438 template<class MatrixType>
440  return(ComputeTime_);
441 }
442 
443 
444 template<class MatrixType>
446  return(ApplyTime_);
447 }
448 
449 
450 template<class MatrixType>
452  return(ComputeFlops_);
453 }
454 
455 
456 template<class MatrixType>
458  return(ApplyFlops_);
459 }
460 
461 
462 
463 template<class MatrixType>
466  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
467  "The input matrix A is null. Please call setMatrix() with a nonnull "
468  "input matrix, then call compute(), before calling this method.");
469  // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
470  return A_->getNodeNumRows() + A_->getNodeNumEntries();
471 }
472 
473 
474 template<class MatrixType>
475 void
477 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
478  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
479  Teuchos::ETransp /* mode */,
480  scalar_type alpha,
481  scalar_type beta) const
482 {
483  using Teuchos::as;
484  using Teuchos::RCP;
485  using Teuchos::rcp;
486  using Teuchos::rcpFromRef;
487  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
490  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
491  "The input matrix A is null. Please call setMatrix() with a nonnull "
492  "input matrix, then call compute(), before calling this method.");
494  ! isComputed (),
495  std::runtime_error,
496  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
497  "preconditioner instance before you may call apply(). You may call "
498  "isComputed() to find out if compute() has been called already.");
499  TEUCHOS_TEST_FOR_EXCEPTION(
500  X.getNumVectors() != Y.getNumVectors(),
501  std::runtime_error,
502  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
503  "X has " << X.getNumVectors() << " columns, but Y has "
504  << Y.getNumVectors() << " columns.");
505  TEUCHOS_TEST_FOR_EXCEPTION(
506  beta != STS::zero (), std::logic_error,
507  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
508  "implemented.");
509 
510  const std::string timerName ("Ifpack2::Relaxation::apply");
512  if (timer.is_null ()) {
513  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
514  }
515 
516  {
517  Teuchos::TimeMonitor timeMon (*timer);
518  // Special case: alpha == 0.
519  if (alpha == STS::zero ()) {
520  // No floating-point operations, so no need to update a count.
521  Y.putScalar (STS::zero ());
522  }
523  else {
524  // If X and Y alias one another, then we need to create an
525  // auxiliary vector, Xcopy (a deep copy of X).
526  RCP<const MV> Xcopy;
527  // FIXME (mfh 12 Sep 2014) This test for aliasing is incomplete.
528  {
529 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
530  auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
531  auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
532 #else
533  auto X_lcl_host = X.getLocalViewHost ();
534  auto Y_lcl_host = Y.getLocalViewHost ();
535 #endif
536  if (X_lcl_host.data () == Y_lcl_host.data ()) {
537  Xcopy = rcp (new MV (X, Teuchos::Copy));
538  } else {
539  Xcopy = rcpFromRef (X);
540  }
541  }
542 
543  // Each of the following methods updates the flop count itself.
544  // All implementations handle zeroing out the starting solution
545  // (if necessary) themselves.
546  switch (PrecType_) {
547  case Ifpack2::Details::JACOBI:
548  ApplyInverseJacobi(*Xcopy,Y);
549  break;
550  case Ifpack2::Details::GS:
551  ApplyInverseGS(*Xcopy,Y);
552  break;
553  case Ifpack2::Details::SGS:
554  ApplyInverseSGS(*Xcopy,Y);
555  break;
556  case Ifpack2::Details::MTSGS:
557  ApplyInverseMTSGS_CrsMatrix(*Xcopy,Y);
558  break;
559  case Ifpack2::Details::MTGS:
560  ApplyInverseMTGS_CrsMatrix(*Xcopy,Y);
561  break;
562 
563  default:
564  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
565  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
566  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
567  }
568  if (alpha != STS::one ()) {
569  Y.scale (alpha);
570  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
571  const double numVectors = as<double> (Y.getNumVectors ());
572  ApplyFlops_ += numGlobalRows * numVectors;
573  }
574  }
575  }
576  ApplyTime_ += timer->totalElapsedTime ();
577  ++NumApply_;
578 }
579 
580 
581 template<class MatrixType>
582 void
584 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
585  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
586  Teuchos::ETransp mode) const
587 {
589  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
590  "The input matrix A is null. Please call setMatrix() with a nonnull "
591  "input matrix, then call compute(), before calling this method.");
593  ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
594  "isComputed() must be true before you may call applyMat(). "
595  "Please call compute() before calling this method.");
596  TEUCHOS_TEST_FOR_EXCEPTION(
597  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
598  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
599  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
600  A_->apply (X, Y, mode);
601 }
602 
603 
604 template<class MatrixType>
606 {
608  (A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::initialize: "
609  "The input matrix A is null. Please call setMatrix() with a nonnull "
610  "input matrix before calling this method.");
611  const std::string timerName ("Ifpack2::Relaxation::initialize");
613  if (timer.is_null ()) {
614  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
615  }
616 
617  {
618  Teuchos::TimeMonitor timeMon (*timer);
619 
620  if (A_.is_null ()) {
621  hasBlockCrsMatrix_ = false;
622  }
623  else { // A_ is not null
625  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
626  if (A_bcrs.is_null ()) {
627  hasBlockCrsMatrix_ = false;
628  }
629  else { // A_ is a block_crs_matrix_type
630  hasBlockCrsMatrix_ = true;
631  }
632  }
633 
634  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
635  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (A_.get());
637  (crsMat == NULL, std::logic_error, "Ifpack2::Relaxation::initialize: "
638  "Multithreaded Gauss-Seidel methods currently only work when the input "
639  "matrix is a Tpetra::CrsMatrix.");
640 
641  if(this->ifpack2_dump_matrix_){
642  static int sequence_number = 0;
643  const std::string file_name = "Ifpack2_MT_GS_" + std::to_string (sequence_number++) + ".mtx";
644  Tpetra::MatrixMarket::Writer<crs_matrix_type> crs_writer;
645  Teuchos::RCP<const crs_matrix_type> rcp_crs_mat = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
646  crs_writer.writeSparseFile(file_name, rcp_crs_mat);
647  }
648 
649  this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
650  if (mtKernelHandle_->get_gs_handle () == NULL) {
651  mtKernelHandle_->create_gs_handle ();
652  }
653  local_matrix_type kcsr = crsMat->getLocalMatrix ();
654 
655  bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
656  is_symmetric = is_symmetric || is_matrix_structurally_symmetric_;
657 
658  using KokkosSparse::Experimental::gauss_seidel_symbolic;
659  gauss_seidel_symbolic<mt_kernel_handle_type,
660  lno_row_view_t,
661  lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
662  A_->getNodeNumRows (),
663  A_->getNodeNumCols (),
664  kcsr.graph.row_map,
665  kcsr.graph.entries,
666  is_symmetric);
667  }
668 
669  } // end TimeMonitor scope
670 
671  InitializeTime_ += timer->totalElapsedTime ();
672  ++NumInitialize_;
673  isInitialized_ = true;
674 }
675 
676 namespace Impl {
677 template <typename BlockDiagView>
678 struct InvertDiagBlocks {
679  typedef int value_type;
680  typedef typename BlockDiagView::size_type Size;
681 
682 private:
683  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
684  template <typename View>
685  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
686  typename View::device_type, Unmanaged>;
687 
688  typedef typename BlockDiagView::non_const_value_type Scalar;
689  typedef typename BlockDiagView::device_type Device;
690  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
691  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
692 
693  UnmanagedView<BlockDiagView> block_diag_;
694  // TODO Use thread team and scratch memory space. In this first
695  // pass, provide workspace for each block.
696  RWrk rwrk_buf_;
697  UnmanagedView<RWrk> rwrk_;
698  IWrk iwrk_buf_;
699  UnmanagedView<IWrk> iwrk_;
700 
701 public:
702  InvertDiagBlocks (BlockDiagView& block_diag)
703  : block_diag_(block_diag)
704  {
705  const auto blksz = block_diag.extent(1);
706  Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
707  rwrk_ = rwrk_buf_;
708  Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
709  iwrk_ = iwrk_buf_;
710  }
711 
712  KOKKOS_INLINE_FUNCTION
713  void operator() (const Size i, int& jinfo) const {
714  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
715  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
716  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
717  int info = 0;
718  Tpetra::Experimental::GETF2(D_cur, ipiv, info);
719  if (info) {
720  ++jinfo;
721  return;
722  }
723  Tpetra::Experimental::GETRI(D_cur, ipiv, work, info);
724  if (info) ++jinfo;
725  }
726 
727  // Report the number of blocks with errors.
728  KOKKOS_INLINE_FUNCTION
729  void join (volatile value_type& dst, volatile value_type const& src) const { dst += src; }
730 };
731 }
732 
733 template<class MatrixType>
734 void Relaxation<MatrixType>::computeBlockCrs ()
735 {
736  using Kokkos::ALL;
737  using Teuchos::Array;
738  using Teuchos::ArrayRCP;
739  using Teuchos::ArrayView;
740  using Teuchos::as;
741  using Teuchos::Comm;
742  using Teuchos::RCP;
743  using Teuchos::rcp;
744  using Teuchos::REDUCE_MAX;
745  using Teuchos::REDUCE_MIN;
746  using Teuchos::REDUCE_SUM;
747  using Teuchos::rcp_dynamic_cast;
748  using Teuchos::reduceAll;
749  typedef local_ordinal_type LO;
750  typedef typename node_type::device_type device_type;
751 
752  const std::string timerName ("Ifpack2::Relaxation::computeBlockCrs");
754  if (timer.is_null ()) {
755  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
756  }
757  {
758  Teuchos::TimeMonitor timeMon (*timer);
759 
761  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
762  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
763  "with a nonnull input matrix, then call initialize(), before calling "
764  "this method.");
765  const block_crs_matrix_type* blockCrsA =
766  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
768  blockCrsA == NULL, std::logic_error, "Ifpack2::Relaxation::"
769  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
770  "got this far. Please report this bug to the Ifpack2 developers.");
771 
772  const scalar_type one = STS::one ();
773 
774  // Reset state.
775  IsComputed_ = false;
776 
777  const LO lclNumMeshRows =
778  blockCrsA->getCrsGraph ().getNodeNumRows ();
779  const LO blockSize = blockCrsA->getBlockSize ();
780 
781  if (! savedDiagOffsets_) {
782  blockDiag_ = block_diag_type (); // clear it before reallocating
783  blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
784  lclNumMeshRows, blockSize, blockSize);
785  if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
786  // Clear diagOffsets_ first (by assigning an empty View to it)
787  // to save memory, before reallocating.
788  diagOffsets_ = Kokkos::View<size_t*, device_type> ();
789  diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
790  }
791  blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
793  (static_cast<size_t> (diagOffsets_.extent (0)) !=
794  static_cast<size_t> (blockDiag_.extent (0)),
795  std::logic_error, "diagOffsets_.extent(0) = " <<
796  diagOffsets_.extent (0) << " != blockDiag_.extent(0) = "
797  << blockDiag_.extent (0) <<
798  ". Please report this bug to the Ifpack2 developers.");
799  savedDiagOffsets_ = true;
800  }
801  blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
802 
803  // Use an unmanaged View in this method, so that when we take
804  // subviews of it (to get each diagonal block), we don't have to
805  // touch the reference count. Reference count updates are a
806  // thread scalability bottleneck and have a performance cost even
807  // without using threads.
808  unmanaged_block_diag_type blockDiag = blockDiag_;
809 
810  if (DoL1Method_ && IsParallel_) {
811  const scalar_type two = one + one;
812  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
813  Array<LO> indices (maxLength);
814  Array<scalar_type> values (maxLength * blockSize * blockSize);
815  size_t numEntries = 0;
816 
817  for (LO i = 0; i < lclNumMeshRows; ++i) {
818  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
819  blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
820 
821  auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
822  for (LO subRow = 0; subRow < blockSize; ++subRow) {
823  magnitude_type diagonal_boost = STM::zero ();
824  for (size_t k = 0 ; k < numEntries ; ++k) {
825  if (indices[k] > lclNumMeshRows) {
826  const size_t offset = blockSize*blockSize*k + subRow*blockSize;
827  for (LO subCol = 0; subCol < blockSize; ++subCol) {
828  diagonal_boost += STS::magnitude (values[offset+subCol] / two);
829  }
830  }
831  }
832  if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
833  diagBlock(subRow, subRow) += diagonal_boost;
834  }
835  }
836  }
837  }
838 
839  int info = 0;
840  {
841  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
842  typedef typename unmanaged_block_diag_type::execution_space exec_space;
843  typedef Kokkos::RangePolicy<exec_space, LO> range_type;
844 
845  Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
846  // FIXME (mfh 19 May 2017) Different processes may not
847  // necessarily have this error all at the same time, so it would
848  // be better just to preserve a local error state and let users
849  // check.
851  (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
852  << " diagonal blocks.");
853  }
854 
855  // In a debug build, do an extra test to make sure that all the
856  // factorizations were computed correctly.
857 #ifdef HAVE_IFPACK2_DEBUG
858  const int numResults = 2;
859  // Use "max = -min" trick to get min and max in a single all-reduce.
860  int lclResults[2], gblResults[2];
861  lclResults[0] = info;
862  lclResults[1] = -info;
863  gblResults[0] = 0;
864  gblResults[1] = 0;
865  reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
866  numResults, lclResults, gblResults);
868  (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
869  "Ifpack2::Relaxation::compute: When processing the input "
870  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
871  "failed on one or more (MPI) processes.");
872 #endif // HAVE_IFPACK2_DEBUG
873 
874  Importer_ = A_->getGraph ()->getImporter ();
875  } // end TimeMonitor scope
876 
877  ComputeTime_ += timer->totalElapsedTime ();
878  ++NumCompute_;
879  IsComputed_ = true;
880 }
881 
882 template<class MatrixType>
884 {
885  using Teuchos::Array;
886  using Teuchos::ArrayRCP;
887  using Teuchos::ArrayView;
888  using Teuchos::as;
889  using Teuchos::Comm;
890  using Teuchos::RCP;
891  using Teuchos::rcp;
892  using Teuchos::REDUCE_MAX;
893  using Teuchos::REDUCE_MIN;
894  using Teuchos::REDUCE_SUM;
895  using Teuchos::rcp_dynamic_cast;
896  using Teuchos::reduceAll;
897  typedef Tpetra::Vector<scalar_type, local_ordinal_type,
898  global_ordinal_type, node_type> vector_type;
899  typedef typename vector_type::device_type device_type;
900  const scalar_type zero = STS::zero ();
901  const scalar_type one = STS::one ();
902 
903  // We don't count initialization in compute() time.
904  if (! isInitialized ()) {
905  initialize ();
906  }
907 
908  if (hasBlockCrsMatrix_) {
909  computeBlockCrs ();
910  return;
911  }
912 
913 
914  const std::string timerName ("Ifpack2::Relaxation::compute");
916  if (timer.is_null ()) {
917  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
918  }
919 
920 
921 
922  {
923  Teuchos::TimeMonitor timeMon (*timer);
924 
926  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::compute: "
927  "The input matrix A is null. Please call setMatrix() with a nonnull "
928  "input matrix, then call initialize(), before calling this method.");
929 
930  // Reset state.
931  IsComputed_ = false;
932 
934  NumSweeps_ < 0, std::logic_error,
935  "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ << " < 0. "
936  "Please report this bug to the Ifpack2 developers.");
937 
938  Diagonal_ = rcp (new vector_type (A_->getRowMap ()));
939 
940  // Extract the diagonal entries. The CrsMatrix static graph
941  // version is faster for subsequent calls to compute(), since it
942  // caches the diagonal offsets.
943  //
944  // isStaticGraph() == true means that the matrix was created with
945  // a const graph. The only requirement is that the structure of
946  // the matrix never changes, so isStaticGraph() == true is a bit
947  // more conservative than we need. However, Tpetra doesn't (as of
948  // 05 Apr 2013) have a way to tell if the graph hasn't changed
949  // since the last time we used it.
950  {
951  // NOTE (mfh 07 Jul 2013): We must cast here to CrsMatrix
952  // instead of MatrixType, because isStaticGraph is a CrsMatrix
953  // method (not inherited from RowMatrix's interface). It's
954  // perfectly valid to do relaxation on a RowMatrix which is not
955  // a CrsMatrix.
956  const crs_matrix_type* crsMat =
957  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
958  if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
959  A_->getLocalDiagCopy (*Diagonal_); // slow path
960  } else {
961  if (! savedDiagOffsets_) { // we haven't precomputed offsets
962  const size_t lclNumRows = A_->getRowMap ()->getNodeNumElements ();
963  if (diagOffsets_.extent (0) < lclNumRows) {
964  typedef typename node_type::device_type DT;
965  diagOffsets_ = Kokkos::View<size_t*, DT> (); // clear 1st to save mem
966  diagOffsets_ = Kokkos::View<size_t*, DT> ("offsets", lclNumRows);
967  }
968  crsMat->getCrsGraph ()->getLocalDiagOffsets (diagOffsets_);
969  savedDiagOffsets_ = true;
970  }
971  crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_);
972 #ifdef HAVE_IFPACK2_DEBUG
973  // Validate the fast-path diagonal against the slow-path diagonal.
974  vector_type D_copy (A_->getRowMap ());
975  A_->getLocalDiagCopy (D_copy);
976  D_copy.update (STS::one (), *Diagonal_, -STS::one ());
977  const magnitude_type err = D_copy.normInf ();
978  // The two diagonals should be exactly the same, so their
979  // difference should be exactly zero.
981  err != STM::zero(), std::logic_error, "Ifpack2::Relaxation::compute: "
982  "\"fast-path\" diagonal computation failed. \\|D1 - D2\\|_inf = "
983  << err << ".");
984 #endif // HAVE_IFPACK2_DEBUG
985  }
986  }
987 
988  // If we're checking the computed inverse diagonal, then keep a
989  // copy of the original diagonal entries for later comparison.
990  RCP<vector_type> origDiag;
991  if (checkDiagEntries_) {
992  origDiag = rcp (new vector_type (A_->getRowMap ()));
993  Tpetra::deep_copy (*origDiag, *Diagonal_);
994  }
995 
996  const size_t numMyRows = A_->getNodeNumRows ();
997 
998  // We're about to read and write diagonal entries on the host.
999 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
1000  Diagonal_->template sync<Kokkos::HostSpace> ();
1001  Diagonal_->template modify<Kokkos::HostSpace> ();
1002  auto diag_2d = Diagonal_->template getLocalView<Kokkos::HostSpace> ();
1003 #else
1004  Diagonal_->sync_host ();
1005  Diagonal_->modify_host ();
1006  auto diag_2d = Diagonal_->getLocalViewHost ();
1007 #endif
1008  auto diag_1d = Kokkos::subview (diag_2d, Kokkos::ALL (), 0);
1009  // FIXME (mfh 12 Jan 2016) temp fix for Kokkos::complex vs. std::complex.
1010  scalar_type* const diag = reinterpret_cast<scalar_type*> (diag_1d.data ());
1011 
1012  // Setup for L1 Methods.
1013  // Here we add half the value of the off-processor entries in the row,
1014  // but only if diagonal isn't sufficiently large.
1015  //
1016  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1017  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1018  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1019  if (DoL1Method_ && IsParallel_) {
1020  const scalar_type two = one + one;
1021  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1022  Array<local_ordinal_type> indices (maxLength);
1023  Array<scalar_type> values (maxLength);
1024  size_t numEntries;
1025 
1026  for (size_t i = 0; i < numMyRows; ++i) {
1027  A_->getLocalRowCopy (i, indices (), values (), numEntries);
1028  magnitude_type diagonal_boost = STM::zero ();
1029  for (size_t k = 0 ; k < numEntries ; ++k) {
1030  if (static_cast<size_t> (indices[k]) > numMyRows) {
1031  diagonal_boost += STS::magnitude (values[k] / two);
1032  }
1033  }
1034  if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
1035  diag[i] += diagonal_boost;
1036  }
1037  }
1038  }
1039 
1040  //
1041  // Invert the diagonal entries of the matrix (not in place).
1042  //
1043 
1044  // Precompute some quantities for "fixing" zero or tiny diagonal
1045  // entries. We'll only use them if this "fixing" is enabled.
1046  //
1047  // SmallTraits covers for the lack of eps() in
1048  // Teuchos::ScalarTraits when its template parameter is not a
1049  // floating-point type. (Ifpack2 sometimes gets instantiated for
1050  // integer Scalar types.)
1051  const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
1052  one / SmallTraits<scalar_type>::eps () :
1053  one / MinDiagonalValue_;
1054  // It's helpful not to have to recompute this magnitude each time.
1055  const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1056 
1057  if (checkDiagEntries_) {
1058  //
1059  // Check diagonal elements, replace zero elements with the minimum
1060  // diagonal value, and store their inverses. Count the number of
1061  // "small" and zero diagonal entries while we're at it.
1062  //
1063  size_t numSmallDiagEntries = 0; // "small" includes zero
1064  size_t numZeroDiagEntries = 0; // # zero diagonal entries
1065  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1066 
1067  // As we go, keep track of the diagonal entries with the least and
1068  // greatest magnitude. We could use the trick of starting the min
1069  // with +Inf and the max with -Inf, but that doesn't work if
1070  // scalar_type is a built-in integer type. Thus, we have to start
1071  // by reading the first diagonal entry redundantly.
1072  // scalar_type minMagDiagEntry = zero;
1073  // scalar_type maxMagDiagEntry = zero;
1074  magnitude_type minMagDiagEntryMag = STM::zero ();
1075  magnitude_type maxMagDiagEntryMag = STM::zero ();
1076  if (numMyRows > 0) {
1077  const scalar_type d_0 = diag[0];
1078  const magnitude_type d_0_mag = STS::magnitude (d_0);
1079  // minMagDiagEntry = d_0;
1080  // maxMagDiagEntry = d_0;
1081  minMagDiagEntryMag = d_0_mag;
1082  maxMagDiagEntryMag = d_0_mag;
1083  }
1084 
1085  // Go through all the diagonal entries. Compute counts of
1086  // small-magnitude, zero, and negative-real-part entries. Invert
1087  // the diagonal entries that aren't too small. For those that are
1088  // too small in magnitude, replace them with 1/MinDiagonalValue_
1089  // (or 1/eps if MinDiagonalValue_ happens to be zero).
1090  for (size_t i = 0 ; i < numMyRows; ++i) {
1091  const scalar_type d_i = diag[i];
1092  const magnitude_type d_i_mag = STS::magnitude (d_i);
1093  const magnitude_type d_i_real = STS::real (d_i);
1094 
1095  // We can't compare complex numbers, but we can compare their
1096  // real parts.
1097  if (d_i_real < STM::zero ()) {
1098  ++numNegDiagEntries;
1099  }
1100  if (d_i_mag < minMagDiagEntryMag) {
1101  // minMagDiagEntry = d_i;
1102  minMagDiagEntryMag = d_i_mag;
1103  }
1104  if (d_i_mag > maxMagDiagEntryMag) {
1105  // maxMagDiagEntry = d_i;
1106  maxMagDiagEntryMag = d_i_mag;
1107  }
1108 
1109  if (fixTinyDiagEntries_) {
1110  // <= not <, in case minDiagValMag is zero.
1111  if (d_i_mag <= minDiagValMag) {
1112  ++numSmallDiagEntries;
1113  if (d_i_mag == STM::zero ()) {
1114  ++numZeroDiagEntries;
1115  }
1116  diag[i] = oneOverMinDiagVal;
1117  } else {
1118  diag[i] = one / d_i;
1119  }
1120  }
1121  else { // Don't fix zero or tiny diagonal entries.
1122  // <= not <, in case minDiagValMag is zero.
1123  if (d_i_mag <= minDiagValMag) {
1124  ++numSmallDiagEntries;
1125  if (d_i_mag == STM::zero ()) {
1126  ++numZeroDiagEntries;
1127  }
1128  }
1129  diag[i] = one / d_i;
1130  }
1131  }
1132 
1133  // Count floating-point operations of computing the inverse diagonal.
1134  //
1135  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1136  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1137  ComputeFlops_ += 4.0 * numMyRows;
1138  } else {
1139  ComputeFlops_ += numMyRows;
1140  }
1141 
1142  // Collect global data about the diagonal entries. Only do this
1143  // (which involves O(1) all-reduces) if the user asked us to do
1144  // the extra work.
1145  //
1146  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1147  // zero rows. Fixing this requires one of two options:
1148  //
1149  // 1. Use a custom reduction operation that ignores processes
1150  // with zero diagonal entries.
1151  // 2. Split the communicator, compute all-reduces using the
1152  // subcommunicator over processes that have a nonzero number
1153  // of diagonal entries, and then broadcast from one of those
1154  // processes (if there is one) to the processes in the other
1155  // subcommunicator.
1156  const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1157 
1158  // Compute global min and max magnitude of entries.
1159  Array<magnitude_type> localVals (2);
1160  localVals[0] = minMagDiagEntryMag;
1161  // (- (min (- x))) is the same as (max x).
1162  localVals[1] = -maxMagDiagEntryMag;
1163  Array<magnitude_type> globalVals (2);
1164  reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1165  localVals.getRawPtr (),
1166  globalVals.getRawPtr ());
1167  globalMinMagDiagEntryMag_ = globalVals[0];
1168  globalMaxMagDiagEntryMag_ = -globalVals[1];
1169 
1170  Array<size_t> localCounts (3);
1171  localCounts[0] = numSmallDiagEntries;
1172  localCounts[1] = numZeroDiagEntries;
1173  localCounts[2] = numNegDiagEntries;
1174  Array<size_t> globalCounts (3);
1175  reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1176  localCounts.getRawPtr (),
1177  globalCounts.getRawPtr ());
1178  globalNumSmallDiagEntries_ = globalCounts[0];
1179  globalNumZeroDiagEntries_ = globalCounts[1];
1180  globalNumNegDiagEntries_ = globalCounts[2];
1181 
1182  // Forestall "set but not used" compiler warnings.
1183  // (void) minMagDiagEntry;
1184  // (void) maxMagDiagEntry;
1185 
1186  // Compute and save the difference between the computed inverse
1187  // diagonal, and the original diagonal's inverse.
1188  //
1189  // NOTE (mfh 11 Jan 2016) We need to sync Diagonal_ back from
1190  // host to device for the update kernel below, and we don't need
1191  // to modify it or sync it back again here.
1192  vector_type diff (A_->getRowMap ());
1193  diff.reciprocal (*origDiag);
1194  Diagonal_->template sync<device_type> ();
1195  diff.update (-one, *Diagonal_, one);
1196  globalDiagNormDiff_ = diff.norm2 ();
1197  }
1198  else { // don't check diagonal elements
1199  if (fixTinyDiagEntries_) {
1200  // Go through all the diagonal entries. Invert those that
1201  // aren't too small in magnitude. For those that are too
1202  // small in magnitude, replace them with oneOverMinDiagVal.
1203  for (size_t i = 0 ; i < numMyRows; ++i) {
1204  const scalar_type d_i = diag[i];
1205  const magnitude_type d_i_mag = STS::magnitude (d_i);
1206 
1207  // <= not <, in case minDiagValMag is zero.
1208  if (d_i_mag <= minDiagValMag) {
1209  diag[i] = oneOverMinDiagVal;
1210  } else {
1211  diag[i] = one / d_i;
1212  }
1213  }
1214  }
1215  else { // don't fix tiny or zero diagonal entries
1216  for (size_t i = 0 ; i < numMyRows; ++i) {
1217  diag[i] = one / diag[i];
1218  }
1219  }
1220  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1221  ComputeFlops_ += 4.0 * numMyRows;
1222  } else {
1223  ComputeFlops_ += numMyRows;
1224  }
1225  }
1226 
1227  if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
1228  PrecType_ == Ifpack2::Details::SGS)) {
1229  // mfh 21 Mar 2013: The Import object may be null, but in that
1230  // case, the domain and column Maps are the same and we don't
1231  // need to Import anyway.
1232  Importer_ = A_->getGraph ()->getImporter ();
1233  Diagonal_->template sync<device_type> ();
1234  }
1235 
1236  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
1237  //KokkosKernels GaussSeidel Initialization.
1238 
1239  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (A_.get());
1241  (crsMat == NULL, std::logic_error, "Ifpack2::Relaxation::compute: "
1242  "Multithreaded Gauss-Seidel methods currently only work when the input "
1243  "matrix is a Tpetra::CrsMatrix.");
1244  local_matrix_type kcsr = crsMat->getLocalMatrix ();
1245 
1246  const bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
1247  using KokkosSparse::Experimental::gauss_seidel_numeric;
1248  typedef typename scalar_nonzero_view_t::device_type dev_type;
1249  auto diagView_2d = Diagonal_->template getLocalView<dev_type> ();
1250  auto diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1251  gauss_seidel_numeric<mt_kernel_handle_type,
1252  lno_row_view_t,
1253  lno_nonzero_view_t,
1254  scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1255  A_->getNodeNumRows (),
1256  A_->getNodeNumCols (),
1257  kcsr.graph.row_map,
1258  kcsr.graph.entries,
1259  kcsr.values,
1260  diagView_1d,
1261  is_symmetric);
1262  }
1263  } // end TimeMonitor scope
1264 
1265  ComputeTime_ += timer->totalElapsedTime ();
1266  ++NumCompute_;
1267  IsComputed_ = true;
1268 }
1269 
1270 
1271 template<class MatrixType>
1272 void
1274 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1275  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1276 {
1277  using Teuchos::as;
1278  if (hasBlockCrsMatrix_) {
1279  ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1280  return;
1281  }
1282 
1283  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1284  const double numVectors = as<double> (X.getNumVectors ());
1285  if (ZeroStartingSolution_) {
1286  // For the first Jacobi sweep, if we are allowed to assume that
1287  // the initial guess is zero, then Jacobi is just diagonal
1288  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1289  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1290  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1291 
1292  // Count (global) floating-point operations. Ifpack2 represents
1293  // this as a floating-point number rather than an integer, so that
1294  // overflow (for a very large number of calls, or a very large
1295  // problem) is approximate instead of catastrophic.
1296  double flopUpdate = 0.0;
1297  if (DampingFactor_ == STS::one ()) {
1298  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1299  flopUpdate = numGlobalRows * numVectors;
1300  } else {
1301  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1302  // Two multiplies per entry of Y.
1303  flopUpdate = 2.0 * numGlobalRows * numVectors;
1304  }
1305  ApplyFlops_ += flopUpdate;
1306  if (NumSweeps_ == 1) {
1307  return;
1308  }
1309  }
1310  // If we were allowed to assume that the starting guess was zero,
1311  // then we have already done the first sweep above.
1312  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1313 
1314  // Allocate a multivector if the cached one isn't perfect
1315  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1316 
1317  for (int j = startSweep; j < NumSweeps_; ++j) {
1318  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1319  applyMat (Y, *cachedMV_);
1320  cachedMV_->update (STS::one (), X, -STS::one ());
1321  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1322  }
1323 
1324  // For each column of output, for each pass over the matrix:
1325  //
1326  // - One + and one * for each matrix entry
1327  // - One / and one + for each row of the matrix
1328  // - If the damping factor is not one: one * for each row of the
1329  // matrix. (It's not fair to count this if the damping factor is
1330  // one, since the implementation could skip it. Whether it does
1331  // or not is the implementation's choice.)
1332 
1333  // Floating-point operations due to the damping factor, per matrix
1334  // row, per direction, per columm of output.
1335  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1336  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1337  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1338  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1339 }
1340 
1341 template<class MatrixType>
1342 void
1343 Relaxation<MatrixType>::
1344 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1345  local_ordinal_type,
1346  global_ordinal_type,
1347  node_type>& X,
1348  Tpetra::MultiVector<scalar_type,
1349  local_ordinal_type,
1350  global_ordinal_type,
1351  node_type>& Y) const
1352 {
1353  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1354  local_ordinal_type, global_ordinal_type, node_type> BMV;
1355 
1356  const block_crs_matrix_type* blockMatConst =
1357  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1359  (blockMatConst == NULL, std::logic_error, "This method should never be "
1360  "called if the matrix A_ is not a BlockCrsMatrix. Please report this "
1361  "bug to the Ifpack2 developers.");
1362  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1363  // This is because applyBlock() is nonconst (more accurate), while
1364  // apply() is const (required by Tpetra::Operator interface, but a
1365  // lie, because it possibly allocates temporary buffers).
1366  block_crs_matrix_type* blockMat =
1367  const_cast<block_crs_matrix_type*> (blockMatConst);
1368 
1369  auto meshRowMap = blockMat->getRowMap ();
1370  auto meshColMap = blockMat->getColMap ();
1371  const local_ordinal_type blockSize = blockMat->getBlockSize ();
1372 
1373  BMV X_blk (X, *meshColMap, blockSize);
1374  BMV Y_blk (Y, *meshRowMap, blockSize);
1375 
1376  if (ZeroStartingSolution_) {
1377  // For the first sweep, if we are allowed to assume that the
1378  // initial guess is zero, then block Jacobi is just block diagonal
1379  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1380  Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1381  if (NumSweeps_ == 1) {
1382  return;
1383  }
1384  }
1385 
1386  auto pointRowMap = Y.getMap ();
1387  const size_t numVecs = X.getNumVectors ();
1388 
1389  // We don't need to initialize the result MV, since the sparse
1390  // mat-vec will clobber its contents anyway.
1391  BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1392 
1393  // If we were allowed to assume that the starting guess was zero,
1394  // then we have already done the first sweep above.
1395  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1396 
1397  for (int j = startSweep; j < NumSweeps_; ++j) {
1398  blockMat->applyBlock (Y_blk, A_times_Y);
1399  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1400  Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1401  X_blk, A_times_Y, STS::one ());
1402  }
1403 }
1404 
1405 template<class MatrixType>
1406 void
1407 Relaxation<MatrixType>::
1408 ApplyInverseGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1409  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1410 {
1411  typedef Relaxation<MatrixType> this_type;
1412  // The CrsMatrix version is faster, because it can access the sparse
1413  // matrix data directly, rather than by copying out each row's data
1414  // in turn. Thus, we check whether the RowMatrix is really a
1415  // CrsMatrix.
1416  //
1417  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1418  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1419  // will still be correct if the cast fails, but it will use an
1420  // unoptimized kernel.
1421  const block_crs_matrix_type* blockCrsMat =
1422  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1423  const crs_matrix_type* crsMat =
1424  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
1425  if (blockCrsMat != NULL) {
1426  const_cast<this_type*> (this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1427  } else if (crsMat != NULL) {
1428  ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1429  } else {
1430  ApplyInverseGS_RowMatrix (X, Y);
1431  }
1432 }
1433 
1434 
1435 template<class MatrixType>
1436 void
1437 Relaxation<MatrixType>::
1438 ApplyInverseGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1439  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1440 {
1441  using Teuchos::Array;
1442  using Teuchos::ArrayRCP;
1443  using Teuchos::ArrayView;
1444  using Teuchos::as;
1445  using Teuchos::RCP;
1446  using Teuchos::rcp;
1447  using Teuchos::rcpFromRef;
1448  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1449 
1450  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1451  // starting multivector itself. The generic RowMatrix version here
1452  // does not, so we have to zero out Y here.
1453  if (ZeroStartingSolution_) {
1454  Y.putScalar (STS::zero ());
1455  }
1456 
1457  const size_t NumVectors = X.getNumVectors ();
1458  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1459  Array<local_ordinal_type> Indices (maxLength);
1460  Array<scalar_type> Values (maxLength);
1461 
1462  // Local smoothing stuff
1463  const size_t numMyRows = A_->getNodeNumRows();
1464  const local_ordinal_type* rowInd = 0;
1465  size_t numActive = numMyRows;
1466  bool do_local = ! localSmoothingIndices_.is_null ();
1467  if (do_local) {
1468  rowInd = localSmoothingIndices_.getRawPtr ();
1469  numActive = localSmoothingIndices_.size ();
1470  }
1471 
1472  RCP<MV> Y2;
1473  if (IsParallel_) {
1474  if (Importer_.is_null ()) { // domain and column Maps are the same.
1475  updateCachedMultiVector(Y.getMap(),NumVectors);
1476  } else {
1477  updateCachedMultiVector(Importer_->getTargetMap(),NumVectors);
1478  }
1479  Y2= cachedMV_;
1480  }
1481  else {
1482  Y2 = rcpFromRef (Y);
1483  }
1484 
1485  // Diagonal
1486  ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1487  ArrayView<const scalar_type> d_ptr = d_rcp();
1488 
1489  // Constant stride check
1490  bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1491 
1492  if (constant_stride) {
1493  // extract 1D RCPs
1494  size_t x_stride = X.getStride();
1495  size_t y2_stride = Y2->getStride();
1496  ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1497  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1498  ArrayView<scalar_type> y2_ptr = y2_rcp();
1499  ArrayView<const scalar_type> x_ptr = x_rcp();
1500  Array<scalar_type> dtemp(NumVectors,STS::zero());
1501 
1502  for (int j = 0; j < NumSweeps_; ++j) {
1503  // data exchange is here, once per sweep
1504  if (IsParallel_) {
1505  if (Importer_.is_null ()) {
1506  *Y2 = Y; // just copy, since domain and column Maps are the same
1507  } else {
1508  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1509  }
1510  }
1511 
1512  if (! DoBackwardGS_) { // Forward sweep
1513  for (size_t ii = 0; ii < numActive; ++ii) {
1514  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1515  size_t NumEntries;
1516  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1517  dtemp.assign(NumVectors,STS::zero());
1518 
1519  for (size_t k = 0; k < NumEntries; ++k) {
1520  const local_ordinal_type col = Indices[k];
1521  for (size_t m = 0; m < NumVectors; ++m) {
1522  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1523  }
1524  }
1525 
1526  for (size_t m = 0; m < NumVectors; ++m) {
1527  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1528  }
1529  }
1530  }
1531  else { // Backward sweep
1532  // ptrdiff_t is the same size as size_t, but is signed. Being
1533  // signed is important so that i >= 0 is not trivially true.
1534  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1535  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1536  size_t NumEntries;
1537  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1538  dtemp.assign (NumVectors, STS::zero ());
1539 
1540  for (size_t k = 0; k < NumEntries; ++k) {
1541  const local_ordinal_type col = Indices[k];
1542  for (size_t m = 0; m < NumVectors; ++m) {
1543  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1544  }
1545  }
1546 
1547  for (size_t m = 0; m < NumVectors; ++m) {
1548  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1549  }
1550  }
1551  }
1552  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1553  if (IsParallel_) {
1554  Tpetra::deep_copy (Y, *Y2);
1555  }
1556  }
1557  }
1558  else {
1559  // extract 2D RCPS
1560  ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1561  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1562 
1563  for (int j = 0; j < NumSweeps_; ++j) {
1564  // data exchange is here, once per sweep
1565  if (IsParallel_) {
1566  if (Importer_.is_null ()) {
1567  *Y2 = Y; // just copy, since domain and column Maps are the same
1568  } else {
1569  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1570  }
1571  }
1572 
1573  if (! DoBackwardGS_) { // Forward sweep
1574  for (size_t ii = 0; ii < numActive; ++ii) {
1575  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1576  size_t NumEntries;
1577  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1578 
1579  for (size_t m = 0; m < NumVectors; ++m) {
1580  scalar_type dtemp = STS::zero ();
1581  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1582  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1583 
1584  for (size_t k = 0; k < NumEntries; ++k) {
1585  const local_ordinal_type col = Indices[k];
1586  dtemp += Values[k] * y2_local[col];
1587  }
1588  y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1589  }
1590  }
1591  }
1592  else { // Backward sweep
1593  // ptrdiff_t is the same size as size_t, but is signed. Being
1594  // signed is important so that i >= 0 is not trivially true.
1595  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1596  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1597 
1598  size_t NumEntries;
1599  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1600 
1601  for (size_t m = 0; m < NumVectors; ++m) {
1602  scalar_type dtemp = STS::zero ();
1603  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1604  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1605 
1606  for (size_t k = 0; k < NumEntries; ++k) {
1607  const local_ordinal_type col = Indices[k];
1608  dtemp += Values[k] * y2_local[col];
1609  }
1610  y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1611  }
1612  }
1613  }
1614 
1615  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1616  if (IsParallel_) {
1617  Tpetra::deep_copy (Y, *Y2);
1618  }
1619  }
1620  }
1621 
1622 
1623  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1624  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1625  const double numVectors = as<double> (X.getNumVectors ());
1626  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1627  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1628  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1629  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1630 }
1631 
1632 
1633 template<class MatrixType>
1634 void
1635 Relaxation<MatrixType>::
1636 ApplyInverseGS_CrsMatrix (const crs_matrix_type& A,
1637  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1638  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1639 {
1640  using Teuchos::as;
1641  const Tpetra::ESweepDirection direction =
1642  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1643  if (localSmoothingIndices_.is_null ()) {
1644  A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1645  NumSweeps_, ZeroStartingSolution_);
1646  }
1647  else {
1648  A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1649  DampingFactor_, direction,
1650  NumSweeps_, ZeroStartingSolution_);
1651  }
1652 
1653  // For each column of output, for each sweep over the matrix:
1654  //
1655  // - One + and one * for each matrix entry
1656  // - One / and one + for each row of the matrix
1657  // - If the damping factor is not one: one * for each row of the
1658  // matrix. (It's not fair to count this if the damping factor is
1659  // one, since the implementation could skip it. Whether it does
1660  // or not is the implementation's choice.)
1661 
1662  // Floating-point operations due to the damping factor, per matrix
1663  // row, per direction, per columm of output.
1664  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1665  const double numVectors = as<double> (X.getNumVectors ());
1666  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1667  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1668  ApplyFlops_ += NumSweeps_ * numVectors *
1669  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1670 }
1671 
1672 template<class MatrixType>
1673 void
1674 Relaxation<MatrixType>::
1675 ApplyInverseGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1676  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1677  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1678 {
1679  using Teuchos::RCP;
1680  using Teuchos::rcp;
1681  using Teuchos::rcpFromRef;
1682  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1683  local_ordinal_type, global_ordinal_type, node_type> BMV;
1684  typedef Tpetra::MultiVector<scalar_type,
1685  local_ordinal_type, global_ordinal_type, node_type> MV;
1686 
1687  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1688  //
1689  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1690  // multiple right-hand sides, unless the input or output MultiVector
1691  // does not have constant stride. We should check for that case
1692  // here, in case it doesn't work in localGaussSeidel (which is
1693  // entirely possible).
1694  BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1695  const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1696 
1697  bool performImport = false;
1698  RCP<BMV> yBlockCol;
1699  if (Importer_.is_null ()) {
1700  yBlockCol = rcpFromRef (yBlock);
1701  }
1702  else {
1703  if (yBlockColumnPointMap_.is_null () ||
1704  yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1705  yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1706  yBlockColumnPointMap_ =
1707  rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
1708  static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
1709  }
1710  yBlockCol = yBlockColumnPointMap_;
1711  performImport = true;
1712  }
1713 
1714  if (ZeroStartingSolution_) {
1715  yBlockCol->putScalar (STS::zero ());
1716  }
1717  else if (performImport) {
1718  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1719  }
1720 
1721  const Tpetra::ESweepDirection direction =
1722  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1723 
1724  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1725  if (performImport && sweep > 0) {
1726  yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
1727  }
1728  A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
1729  DampingFactor_, direction);
1730  if (performImport) {
1731  RCP<const MV> yBlockColPointDomain =
1732  yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1733  Tpetra::deep_copy (Y, *yBlockColPointDomain);
1734  }
1735  }
1736 }
1737 
1738 
1739 
1740 
1741 template<class MatrixType>
1742 void
1743 Relaxation<MatrixType>::
1744 MTGaussSeidel (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1745  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1746  const Tpetra::ESweepDirection direction) const
1747 {
1748  using Teuchos::null;
1749  using Teuchos::RCP;
1750  using Teuchos::rcp;
1751  using Teuchos::rcpFromRef;
1752  using Teuchos::rcp_const_cast;
1753  using Teuchos::as;
1754 
1755  typedef scalar_type Scalar;
1756  typedef local_ordinal_type LocalOrdinal;
1757  typedef global_ordinal_type GlobalOrdinal;
1758  typedef node_type Node;
1759 
1760  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1761  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1762 
1763  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (A_.get());
1765  (crsMat == NULL, std::logic_error, "Ifpack2::Relaxation::apply: "
1766  "Multithreaded Gauss-Seidel methods currently only work when the input "
1767  "matrix is a Tpetra::CrsMatrix.");
1768 
1769  //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1771  (! localSmoothingIndices_.is_null (), std::logic_error,
1772  "Our implementation of Multithreaded Gauss-Seidel does not implement the "
1773  "use case where the user supplies an iteration order. "
1774  "This error used to appear as \"MT GaussSeidel ignores the given "
1775  "order\". "
1776  "I tried to add more explanation, but I didn't implement \"MT "
1777  "GaussSeidel\" [sic]. "
1778  "You'll have to ask the person who did.");
1779 
1781  (crsMat == NULL, std::logic_error, prefix << "The matrix is NULL. This "
1782  "should never happen. Please report this bug to the Ifpack2 developers.");
1784  (! crsMat->isFillComplete (), std::runtime_error, prefix << "The input "
1785  "CrsMatrix is not fill complete. Please call fillComplete on the matrix,"
1786  " then do setup again, before calling apply(). \"Do setup\" means that "
1787  "if only the matrix's values have changed since last setup, you need only"
1788  " call compute(). If the matrix's structure may also have changed, you "
1789  "must first call initialize(), then call compute(). If you have not set"
1790  " up this preconditioner for this matrix before, you must first call "
1791  "initialize(), then call compute().");
1793  (NumSweeps_ < 0, std::invalid_argument, prefix << "The number of sweeps "
1794  "must be nonnegative, but you provided numSweeps = " << NumSweeps_ <<
1795  " < 0.");
1796  if (NumSweeps_ == 0) {
1797  return;
1798  }
1799 
1800  typedef typename Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1801  typedef typename crs_matrix_type::import_type import_type;
1802  typedef typename crs_matrix_type::export_type export_type;
1803  typedef typename crs_matrix_type::map_type map_type;
1804 
1805  RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1806  RCP<const export_type> exporter = crsMat->getGraph ()->getExporter ();
1808  ! exporter.is_null (), std::runtime_error,
1809  "This method's implementation currently requires that the matrix's row, "
1810  "domain, and range Maps be the same. This cannot be the case, because "
1811  "the matrix has a nontrivial Export object.");
1812 
1813  RCP<const map_type> domainMap = crsMat->getDomainMap ();
1814  RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1815  RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1816  RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1817 
1818 #ifdef HAVE_IFPACK2_DEBUG
1819  {
1820  // The relation 'isSameAs' is transitive. It's also a
1821  // collective, so we don't have to do a "shared" test for
1822  // exception (i.e., a global reduction on the test value).
1824  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1825  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1826  "multivector X be in the domain Map of the matrix.");
1828  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1829  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1830  "B be in the range Map of the matrix.");
1832  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1833  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1834  "D be in the row Map of the matrix.");
1836  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1837  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1838  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1840  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1841  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1842  "the range Map of the matrix be the same.");
1843  }
1844 #else
1845  // Forestall any compiler warnings for unused variables.
1846  (void) rangeMap;
1847  (void) rowMap;
1848 #endif // HAVE_IFPACK2_DEBUG
1849 
1850  // Fetch a (possibly cached) temporary column Map multivector
1851  // X_colMap, and a domain Map view X_domainMap of it. Both have
1852  // constant stride by construction. We know that the domain Map
1853  // must include the column Map, because our Gauss-Seidel kernel
1854  // requires that the row Map, domain Map, and range Map are all
1855  // the same, and that each process owns all of its own diagonal
1856  // entries of the matrix.
1857 
1858  RCP<MV> X_colMap;
1859  RCP<MV> X_domainMap;
1860  bool copyBackOutput = false;
1861  if (importer.is_null ()) {
1862  if (X.isConstantStride ()) {
1863  X_colMap = rcpFromRef (X);
1864  X_domainMap = rcpFromRef (X);
1865 
1866  // Column Map and domain Map are the same, so there are no
1867  // remote entries. Thus, if we are not setting the initial
1868  // guess to zero, we don't have to worry about setting remote
1869  // entries to zero, even though we are not doing an Import in
1870  // this case.
1871  if (ZeroStartingSolution_) {
1872  X_colMap->putScalar (ZERO);
1873  }
1874  // No need to copy back to X at end.
1875  }
1876  else {
1877  // We must copy X into a constant stride multivector.
1878  // Just use the cached column Map multivector for that.
1879  // force=true means fill with zeros, so no need to fill
1880  // remote entries (not in domain Map) with zeros.
1881  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
1882  X_colMap = cachedMV_;
1883  // X_domainMap is always a domain Map view of the column Map
1884  // multivector. In this case, the domain and column Maps are
1885  // the same, so X_domainMap _is_ X_colMap.
1886  X_domainMap = X_colMap;
1887  if (! ZeroStartingSolution_) { // Don't copy if zero initial guess
1888  try {
1889  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
1890  } catch (std::exception& e) {
1891  std::ostringstream os;
1892  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1893  "deep_copy(*X_domainMap, X) threw an exception: "
1894  << e.what () << ".";
1895  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
1896  }
1897  }
1898  copyBackOutput = true; // Don't forget to copy back at end.
1899  /*
1900  TPETRA_EFFICIENCY_WARNING(
1901  ! X.isConstantStride (),
1902  std::runtime_error,
1903  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
1904  "kernel requires that X and B both have constant stride. Since X "
1905  "does not have constant stride, we had to make a copy. This is a "
1906  "limitation of the current implementation and not your fault, but we "
1907  "still report it as an efficiency warning for your information.");
1908  */
1909  }
1910  }
1911  else { // Column Map and domain Map are _not_ the same.
1912  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
1913  X_colMap = cachedMV_;
1914 
1915  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1916 
1917 #ifdef HAVE_IFPACK2_DEBUG
1918  auto X_colMap_host_view = X_colMap->template getLocalView<Kokkos::HostSpace> ();
1919  auto X_domainMap_host_view = X_domainMap->template getLocalView<Kokkos::HostSpace> ();
1920 
1921  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
1923  X_colMap_host_view.data () != X_domainMap_host_view.data (),
1924  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1925  "Pointer to start of column Map view of X is not equal to pointer to "
1926  "start of (domain Map view of) X. This may mean that "
1927  "Tpetra::MultiVector::offsetViewNonConst is broken. "
1928  "Please report this bug to the Tpetra developers.");
1929  }
1930 
1932  X_colMap_host_view.extent (0) < X_domainMap_host_view.extent (0) ||
1933  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
1934  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1935  "X_colMap has fewer local rows than X_domainMap. "
1936  "X_colMap_host_view.extent(0) = " << X_colMap_host_view.extent (0)
1937  << ", X_domainMap_host_view.extent(0) = "
1938  << X_domainMap_host_view.extent (0)
1939  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
1940  << ", and X_domainMap->getLocalLength() = "
1941  << X_domainMap->getLocalLength ()
1942  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
1943  "is broken. Please report this bug to the Tpetra developers.");
1944 
1946  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
1947  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1948  "X_colMap has a different number of columns than X_domainMap. "
1949  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
1950  << " != X_domainMap->getNumVectors() = "
1951  << X_domainMap->getNumVectors ()
1952  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
1953  "is broken. Please report this bug to the Tpetra developers.");
1954 #endif // HAVE_IFPACK2_DEBUG
1955 
1956  if (ZeroStartingSolution_) {
1957  // No need for an Import, since we're filling with zeros.
1958  X_colMap->putScalar (ZERO);
1959  } else {
1960  // We could just copy X into X_domainMap. However, that
1961  // wastes a copy, because the Import also does a copy (plus
1962  // communication). Since the typical use case for
1963  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1964  // don't want to waste that copy. Thus, we do the Import
1965  // here, and skip the first Import in the first sweep.
1966  // Importing directly from X effects the copy into X_domainMap
1967  // (which is a view of X_colMap).
1968  X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
1969  }
1970  copyBackOutput = true; // Don't forget to copy back at end.
1971  } // if column and domain Maps are (not) the same
1972 
1973  // The Gauss-Seidel / SOR kernel expects multivectors of constant
1974  // stride. X_colMap is by construction, but B might not be. If
1975  // it's not, we have to make a copy.
1976  RCP<const MV> B_in;
1977  if (B.isConstantStride ()) {
1978  B_in = rcpFromRef (B);
1979  }
1980  else {
1981  // Range Map and row Map are the same in this case, so we can
1982  // use the cached row Map multivector to store a constant stride
1983  // copy of B.
1984  //RCP<MV> B_in_nonconst = crsMat->getRowMapMultiVector (B, true);
1985  RCP<MV> B_in_nonconst = rcp (new MV (rowMap, B.getNumVectors()));
1986  try {
1987  deep_copy (*B_in_nonconst, B);
1988  } catch (std::exception& e) {
1989  std::ostringstream os;
1990  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1991  "deep_copy(*B_in_nonconst, B) threw an exception: "
1992  << e.what () << ".";
1993  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
1994  }
1995  B_in = rcp_const_cast<const MV> (B_in_nonconst);
1996 
1997  /*
1998  TPETRA_EFFICIENCY_WARNING(
1999  ! B.isConstantStride (),
2000  std::runtime_error,
2001  "MTGaussSeidel: The current implementation requires that B have "
2002  "constant stride. Since B does not have constant stride, we had to "
2003  "copy it into a separate constant-stride multivector. This is a "
2004  "limitation of the current implementation and not your fault, but we "
2005  "still report it as an efficiency warning for your information.");
2006  */
2007  }
2008 
2009  local_matrix_type kcsr = crsMat->getLocalMatrix ();
2010  const size_t NumVectors = X.getNumVectors ();
2011 
2012  bool update_y_vector = true;
2013  //false as it was done up already, and we dont want to zero it in each sweep.
2014  bool zero_x_vector = false;
2015 
2016  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2017  if (! importer.is_null () && sweep > 0) {
2018  // We already did the first Import for the zeroth sweep above,
2019  // if it was necessary.
2020  X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2021  }
2022 
2023  for (size_t indVec = 0; indVec < NumVectors; ++indVec) {
2024  if (direction == Tpetra::Symmetric) {
2025  KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2026  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2027  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2028  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2029  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2030  zero_x_vector, update_y_vector, DampingFactor_);
2031  }
2032  else if (direction == Tpetra::Forward) {
2033  KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2034  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2035  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2036  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2037  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2038  zero_x_vector, update_y_vector, DampingFactor_);
2039  }
2040  else if (direction == Tpetra::Backward) {
2041  KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2042  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2043  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2044  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2045  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2046  zero_x_vector, update_y_vector, DampingFactor_);
2047  }
2048  else {
2050  true, std::invalid_argument,
2051  prefix << "The 'direction' enum does not have any of its valid "
2052  "values: Forward, Backward, or Symmetric.");
2053  }
2054  }
2055 
2056  if (NumVectors > 1){
2057  update_y_vector = true;
2058  }
2059  else {
2060  update_y_vector = false;
2061  }
2062  }
2063 
2064  if (copyBackOutput) {
2065  try {
2066  deep_copy (X , *X_domainMap); // Copy result back into X.
2067  } catch (std::exception& e) {
2069  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2070  "threw an exception: " << e.what ());
2071  }
2072  }
2073 
2074  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2075  const double numVectors = as<double> (X.getNumVectors ());
2076  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2077  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2078  double ApplyFlops = NumSweeps_ * numVectors *
2079  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2080  if (direction == Tpetra::Symmetric)
2081  ApplyFlops *= 2.0;
2082  ApplyFlops_ += ApplyFlops;
2083 
2084 }
2085 
2086 template<class MatrixType>
2087 void
2088 Relaxation<MatrixType>::
2089 ApplyInverseMTSGS_CrsMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2090  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X) const
2091 {
2092  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2093  this->MTGaussSeidel (B, X, direction);
2094 }
2095 
2096 
2097 template<class MatrixType>
2098 void Relaxation<MatrixType>::ApplyInverseMTGS_CrsMatrix (
2099  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2100  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X) const {
2101 
2102  const Tpetra::ESweepDirection direction =
2103  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
2104  this->MTGaussSeidel (B, X, direction);
2105 }
2106 
2107 template<class MatrixType>
2108 void
2109 Relaxation<MatrixType>::
2110 ApplyInverseSGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2111  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2112 {
2113  typedef Relaxation<MatrixType> this_type;
2114  // The CrsMatrix version is faster, because it can access the sparse
2115  // matrix data directly, rather than by copying out each row's data
2116  // in turn. Thus, we check whether the RowMatrix is really a
2117  // CrsMatrix.
2118  //
2119  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
2120  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
2121  // will still be correct if the cast fails, but it will use an
2122  // unoptimized kernel.
2123  const block_crs_matrix_type* blockCrsMat = dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr());
2124  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
2125  if (blockCrsMat != NULL) {
2126  const_cast<this_type*> (this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
2127  }
2128  else if (crsMat != NULL) {
2129  ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
2130  } else {
2131  ApplyInverseSGS_RowMatrix (X, Y);
2132  }
2133 }
2134 
2135 
2136 template<class MatrixType>
2137 void
2138 Relaxation<MatrixType>::
2139 ApplyInverseSGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2140  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2141 {
2142  using Teuchos::Array;
2143  using Teuchos::ArrayRCP;
2144  using Teuchos::ArrayView;
2145  using Teuchos::as;
2146  using Teuchos::RCP;
2147  using Teuchos::rcp;
2148  using Teuchos::rcpFromRef;
2149  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
2150 
2151 
2152  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
2153  // starting multivector itself. The generic RowMatrix version here
2154  // does not, so we have to zero out Y here.
2155  if (ZeroStartingSolution_) {
2156  Y.putScalar (STS::zero ());
2157  }
2158 
2159  const size_t NumVectors = X.getNumVectors ();
2160  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
2161  Array<local_ordinal_type> Indices (maxLength);
2162  Array<scalar_type> Values (maxLength);
2163 
2164  // Local smoothing stuff
2165  const size_t numMyRows = A_->getNodeNumRows();
2166  const local_ordinal_type * rowInd = 0;
2167  size_t numActive = numMyRows;
2168  bool do_local = !localSmoothingIndices_.is_null();
2169  if(do_local) {
2170  rowInd = localSmoothingIndices_.getRawPtr();
2171  numActive = localSmoothingIndices_.size();
2172  }
2173 
2174 
2175  RCP<MV> Y2;
2176  if (IsParallel_) {
2177  if (Importer_.is_null ()) { // domain and column Maps are the same.
2178  updateCachedMultiVector(Y.getMap(),NumVectors);
2179  } else {
2180  updateCachedMultiVector(Importer_->getTargetMap(),NumVectors);
2181  }
2182  Y2= cachedMV_;
2183  }
2184  else {
2185  Y2 = rcpFromRef (Y);
2186  }
2187 
2188  // Diagonal
2189  ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
2190  ArrayView<const scalar_type> d_ptr = d_rcp();
2191 
2192  // Constant stride check
2193  bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
2194 
2195  if(constant_stride) {
2196  // extract 1D RCPs
2197  size_t x_stride = X.getStride();
2198  size_t y2_stride = Y2->getStride();
2199  ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
2200  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
2201  ArrayView<scalar_type> y2_ptr = y2_rcp();
2202  ArrayView<const scalar_type> x_ptr = x_rcp();
2203  Array<scalar_type> dtemp(NumVectors,STS::zero());
2204 
2205  for (int j = 0; j < NumSweeps_; j++) {
2206  // data exchange is here, once per sweep
2207  if (IsParallel_) {
2208  if (Importer_.is_null ()) {
2209  // just copy, since domain and column Maps are the same
2210  Tpetra::deep_copy (*Y2, Y);
2211  } else {
2212  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2213  }
2214  }
2215  for (size_t ii = 0; ii < numActive; ++ii) {
2216  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2217  size_t NumEntries;
2218  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2219  dtemp.assign(NumVectors,STS::zero());
2220 
2221  for (size_t k = 0; k < NumEntries; ++k) {
2222  const local_ordinal_type col = Indices[k];
2223  for (size_t m = 0; m < NumVectors; ++m) {
2224  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2225  }
2226  }
2227 
2228  for (size_t m = 0; m < NumVectors; ++m) {
2229  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2230  }
2231  }
2232 
2233  // ptrdiff_t is the same size as size_t, but is signed. Being
2234  // signed is important so that i >= 0 is not trivially true.
2235  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2236  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2237  size_t NumEntries;
2238  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2239  dtemp.assign(NumVectors,STS::zero());
2240 
2241  for (size_t k = 0; k < NumEntries; ++k) {
2242  const local_ordinal_type col = Indices[k];
2243  for (size_t m = 0; m < NumVectors; ++m) {
2244  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2245  }
2246  }
2247 
2248  for (size_t m = 0; m < NumVectors; ++m) {
2249  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2250  }
2251  }
2252 
2253  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
2254  if (IsParallel_) {
2255  Tpetra::deep_copy (Y, *Y2);
2256  }
2257  }
2258  }
2259  else {
2260  // extract 2D RCPs
2261  ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
2262  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
2263 
2264  for (int iter = 0; iter < NumSweeps_; ++iter) {
2265  // only one data exchange per sweep
2266  if (IsParallel_) {
2267  if (Importer_.is_null ()) {
2268  // just copy, since domain and column Maps are the same
2269  Tpetra::deep_copy (*Y2, Y);
2270  } else {
2271  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2272  }
2273  }
2274 
2275  for (size_t ii = 0; ii < numActive; ++ii) {
2276  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2277  const scalar_type diag = d_ptr[i];
2278  size_t NumEntries;
2279  A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2280 
2281  for (size_t m = 0; m < NumVectors; ++m) {
2282  scalar_type dtemp = STS::zero ();
2283  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2284  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2285 
2286  for (size_t k = 0; k < NumEntries; ++k) {
2287  const local_ordinal_type col = Indices[k];
2288  dtemp += Values[k] * y2_local[col];
2289  }
2290  y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2291  }
2292  }
2293 
2294  // ptrdiff_t is the same size as size_t, but is signed. Being
2295  // signed is important so that i >= 0 is not trivially true.
2296  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2297  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2298  const scalar_type diag = d_ptr[i];
2299  size_t NumEntries;
2300  A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2301 
2302  for (size_t m = 0; m < NumVectors; ++m) {
2303  scalar_type dtemp = STS::zero ();
2304  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2305  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2306 
2307  for (size_t k = 0; k < NumEntries; ++k) {
2308  const local_ordinal_type col = Indices[k];
2309  dtemp += Values[k] * y2_local[col];
2310  }
2311  y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2312  }
2313  }
2314 
2315  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
2316  if (IsParallel_) {
2317  Tpetra::deep_copy (Y, *Y2);
2318  }
2319  }
2320  }
2321 
2322  // See flop count discussion in implementation of ApplyInverseSGS_CrsMatrix().
2323  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2324  const double numVectors = as<double> (X.getNumVectors ());
2325  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2326  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2327  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2328  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2329 }
2330 
2331 
2332 template<class MatrixType>
2333 void
2334 Relaxation<MatrixType>::
2335 ApplyInverseSGS_CrsMatrix (const crs_matrix_type& A,
2336  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2337  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2338 {
2339  using Teuchos::as;
2340  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2341  if (localSmoothingIndices_.is_null ()) {
2342  A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
2343  NumSweeps_, ZeroStartingSolution_);
2344  }
2345  else {
2346  A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
2347  DampingFactor_, direction,
2348  NumSweeps_, ZeroStartingSolution_);
2349  }
2350 
2351  // For each column of output, for each sweep over the matrix:
2352  //
2353  // - One + and one * for each matrix entry
2354  // - One / and one + for each row of the matrix
2355  // - If the damping factor is not one: one * for each row of the
2356  // matrix. (It's not fair to count this if the damping factor is
2357  // one, since the implementation could skip it. Whether it does
2358  // or not is the implementation's choice.)
2359  //
2360  // Each sweep of symmetric Gauss-Seidel / SOR counts as two sweeps,
2361  // one forward and one backward.
2362 
2363  // Floating-point operations due to the damping factor, per matrix
2364  // row, per direction, per columm of output.
2365  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2366  const double numVectors = as<double> (X.getNumVectors ());
2367  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2368  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2369  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2370  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2371 }
2372 
2373 template<class MatrixType>
2374 void
2375 Relaxation<MatrixType>::
2376 ApplyInverseSGS_BlockCrsMatrix (const block_crs_matrix_type& A,
2377  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2378  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
2379 {
2380  using Teuchos::RCP;
2381  using Teuchos::rcp;
2382  using Teuchos::rcpFromRef;
2383  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
2384  local_ordinal_type, global_ordinal_type, node_type> BMV;
2385  typedef Tpetra::MultiVector<scalar_type,
2386  local_ordinal_type, global_ordinal_type, node_type> MV;
2387 
2388  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
2389  //
2390  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
2391  // multiple right-hand sides, unless the input or output MultiVector
2392  // does not have constant stride. We should check for that case
2393  // here, in case it doesn't work in localGaussSeidel (which is
2394  // entirely possible).
2395  BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
2396  const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
2397 
2398  bool performImport = false;
2399  RCP<BMV> yBlockCol;
2400  if (Importer_.is_null ()) {
2401  yBlockCol = Teuchos::rcpFromRef (yBlock);
2402  }
2403  else {
2404  if (yBlockColumnPointMap_.is_null () ||
2405  yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
2406  yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
2407  yBlockColumnPointMap_ =
2408  rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
2409  static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
2410  }
2411  yBlockCol = yBlockColumnPointMap_;
2412  performImport = true;
2413  }
2414 
2415  if (ZeroStartingSolution_) {
2416  yBlockCol->putScalar (STS::zero ());
2417  }
2418  else if (performImport) {
2419  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2420  }
2421 
2422  // FIXME (mfh 12 Sep 2014) Shouldn't this come from the user's parameter?
2423  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2424 
2425  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2426  if (performImport && sweep > 0) {
2427  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2428  }
2429  A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
2430  DampingFactor_, direction);
2431  if (performImport) {
2432  RCP<const MV> yBlockColPointDomain =
2433  yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
2434  MV yBlockView = yBlock.getMultiVectorView ();
2435  Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
2436  }
2437  }
2438 }
2439 
2440 
2441 template<class MatrixType>
2443 {
2444  std::ostringstream os;
2445 
2446  // Output is a valid YAML dictionary in flow style. If you don't
2447  // like everything on a single line, you should call describe()
2448  // instead.
2449  os << "\"Ifpack2::Relaxation\": {";
2450 
2451  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2452  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2453 
2454  // It's useful to print this instance's relaxation method (Jacobi,
2455  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2456  // than that, call describe() instead.
2457  os << "Type: ";
2458  if (PrecType_ == Ifpack2::Details::JACOBI) {
2459  os << "Jacobi";
2460  } else if (PrecType_ == Ifpack2::Details::GS) {
2461  os << "Gauss-Seidel";
2462  } else if (PrecType_ == Ifpack2::Details::SGS) {
2463  os << "Symmetric Gauss-Seidel";
2464  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2465  os << "MT Gauss-Seidel";
2466  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2467  os << "MT Symmetric Gauss-Seidel";
2468  }
2469  else {
2470  os << "INVALID";
2471  }
2472 
2473  os << ", " << "sweeps: " << NumSweeps_ << ", "
2474  << "damping factor: " << DampingFactor_ << ", ";
2475  if (DoL1Method_) {
2476  os << "use l1: " << DoL1Method_ << ", "
2477  << "l1 eta: " << L1Eta_ << ", ";
2478  }
2479 
2480  if (A_.is_null ()) {
2481  os << "Matrix: null";
2482  }
2483  else {
2484  os << "Global matrix dimensions: ["
2485  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2486  << ", Global nnz: " << A_->getGlobalNumEntries();
2487  }
2488 
2489  os << "}";
2490  return os.str ();
2491 }
2492 
2493 
2494 template<class MatrixType>
2495 void
2498  const Teuchos::EVerbosityLevel verbLevel) const
2499 {
2500  using Teuchos::OSTab;
2502  using Teuchos::VERB_DEFAULT;
2503  using Teuchos::VERB_NONE;
2504  using Teuchos::VERB_LOW;
2505  using Teuchos::VERB_MEDIUM;
2506  using Teuchos::VERB_HIGH;
2507  using Teuchos::VERB_EXTREME;
2508  using std::endl;
2509 
2510  const Teuchos::EVerbosityLevel vl =
2511  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2512 
2513  const int myRank = this->getComm ()->getRank ();
2514 
2515  // none: print nothing
2516  // low: print O(1) info from Proc 0
2517  // medium:
2518  // high:
2519  // extreme:
2520 
2521  if (vl != VERB_NONE && myRank == 0) {
2522  // Describable interface asks each implementation to start with a tab.
2523  OSTab tab1 (out);
2524  // Output is valid YAML; hence the quotes, to protect the colons.
2525  out << "\"Ifpack2::Relaxation\":" << endl;
2526  OSTab tab2 (out);
2527  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2528  << endl;
2529  if (this->getObjectLabel () != "") {
2530  out << "Label: " << this->getObjectLabel () << endl;
2531  }
2532  out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2533  << "Computed: " << (isComputed () ? "true" : "false") << endl
2534  << "Parameters: " << endl;
2535  {
2536  OSTab tab3 (out);
2537  out << "\"relaxation: type\": ";
2538  if (PrecType_ == Ifpack2::Details::JACOBI) {
2539  out << "Jacobi";
2540  } else if (PrecType_ == Ifpack2::Details::GS) {
2541  out << "Gauss-Seidel";
2542  } else if (PrecType_ == Ifpack2::Details::SGS) {
2543  out << "Symmetric Gauss-Seidel";
2544  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2545  out << "MT Gauss-Seidel";
2546  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2547  out << "MT Symmetric Gauss-Seidel";
2548  } else {
2549  out << "INVALID";
2550  }
2551  // We quote these parameter names because they contain colons.
2552  // YAML uses the colon to distinguish key from value.
2553  out << endl
2554  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2555  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2556  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2557  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2558  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2559  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2560  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2561  }
2562  out << "Computed quantities:" << endl;
2563  {
2564  OSTab tab3 (out);
2565  out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2566  << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2567  }
2568  if (checkDiagEntries_ && isComputed ()) {
2569  out << "Properties of input diagonal entries:" << endl;
2570  {
2571  OSTab tab3 (out);
2572  out << "Magnitude of minimum-magnitude diagonal entry: "
2573  << globalMinMagDiagEntryMag_ << endl
2574  << "Magnitude of maximum-magnitude diagonal entry: "
2575  << globalMaxMagDiagEntryMag_ << endl
2576  << "Number of diagonal entries with small magnitude: "
2577  << globalNumSmallDiagEntries_ << endl
2578  << "Number of zero diagonal entries: "
2579  << globalNumZeroDiagEntries_ << endl
2580  << "Number of diagonal entries with negative real part: "
2581  << globalNumNegDiagEntries_ << endl
2582  << "Abs 2-norm diff between computed and actual inverse "
2583  << "diagonal: " << globalDiagNormDiff_ << endl;
2584  }
2585  }
2586  if (isComputed ()) {
2587  out << "Saved diagonal offsets: "
2588  << (savedDiagOffsets_ ? "true" : "false") << endl;
2589  }
2590  out << "Call counts and total times (in seconds): " << endl;
2591  {
2592  OSTab tab3 (out);
2593  out << "initialize: " << endl;
2594  {
2595  OSTab tab4 (out);
2596  out << "Call count: " << NumInitialize_ << endl;
2597  out << "Total time: " << InitializeTime_ << endl;
2598  }
2599  out << "compute: " << endl;
2600  {
2601  OSTab tab4 (out);
2602  out << "Call count: " << NumCompute_ << endl;
2603  out << "Total time: " << ComputeTime_ << endl;
2604  }
2605  out << "apply: " << endl;
2606  {
2607  OSTab tab4 (out);
2608  out << "Call count: " << NumApply_ << endl;
2609  out << "Total time: " << ApplyTime_ << endl;
2610  }
2611  }
2612  }
2613 }
2614 
2615 } // namespace Ifpack2
2616 
2617 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2618  template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2619 
2620 #endif // IFPACK2_RELAXATION_DEF_HPP
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:409
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:451
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:457
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2497
bool is_null(const boost::shared_ptr< T > &p)
basic_OSTab< char > OSTab
static magnitudeType eps()
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:177
T & get(const std::string &name, T def_value)
void compute()
Compute the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:883
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static RCP< Time > getNewCounter(const std::string &name)
static RCP< Time > lookupCounter(const std::string &name)
virtual void printDoc(std::string const &docString, std::ostream &out) const =0
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:464
virtual const std::string getXMLTypeName() const =0
static std::ostream & printLines(std::ostream &os, const std::string &linePrefix, const std::string &lines)
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:366
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:157
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:427
T * getRawPtr() const
bool remove(std::string const &name, bool throwIfNotExists=true)
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:439
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:247
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:386
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:356
File for utility functions.
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2442
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:415
void validateParametersAndSetDefaults(ParameterList const &validParamList, int const depth=1000)
TEUCHOS_DEPRECATED void reduceAll(const Comm< Ordinal > &comm, const EReductionType reductType, const Packet &send, Packet *globalReduct)
virtual ValidStringsList validStringValues() const =0
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:399
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:244
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:433
any & getAny(bool activeQry=true)
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:584
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:377
TypeTo as(const TypeFrom &t)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:241
bool isType(const std::string &name) const
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:238
double totalElapsedTime(bool readCurrentTime=false) const
std::string typeName() const
virtual ~Relaxation()
Destructor.
Definition: Ifpack2_Relaxation_def.hpp:218
const std::type_info & type() const
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:421
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:223
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:445
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:50
T * get() const
void initialize()
Initialize the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:605
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:477
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:223
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:250
virtual void validate(ParameterEntry const &entry, std::string const &paramName, std::string const &sublistName) const =0
bool is_null() const