Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Relaxation_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ***********************************************************************
38 //@HEADER
39 */
40 
41 #ifndef IFPACK2_RELAXATION_DEF_HPP
42 #define IFPACK2_RELAXATION_DEF_HPP
43 
44 #include "Teuchos_StandardParameterEntryValidators.hpp"
45 #include "Teuchos_TimeMonitor.hpp"
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_BlockCrsMatrix.hpp"
48 #include "Tpetra_BlockView.hpp"
49 #include "Ifpack2_Utilities.hpp"
50 #include "Ifpack2_Details_getCrsMatrix.hpp"
51 #include "MatrixMarket_Tpetra.hpp"
52 #include "Tpetra_Details_residual.hpp"
53 #include <cstdlib>
54 #include <memory>
55 #include <sstream>
56 #include "KokkosSparse_gauss_seidel.hpp"
57 
58 namespace {
59  // Validate that a given int is nonnegative.
60  class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
61  public:
62  // Constructor (does nothing).
63  NonnegativeIntValidator () {}
64 
65  // ParameterEntryValidator wants this method.
67  return Teuchos::null;
68  }
69 
70  // Actually validate the parameter's value.
71  void
72  validate (const Teuchos::ParameterEntry& entry,
73  const std::string& paramName,
74  const std::string& sublistName) const
75  {
76  using std::endl;
77  Teuchos::any anyVal = entry.getAny (true);
78  const std::string entryName = entry.getAny (false).typeName ();
79 
81  anyVal.type () != typeid (int),
83  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
84  << "\" has the wrong type." << endl << "Parameter: " << paramName
85  << endl << "Type specified: " << entryName << endl
86  << "Type required: int" << endl);
87 
88  const int val = Teuchos::any_cast<int> (anyVal);
91  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
92  << "\" is negative." << endl << "Parameter: " << paramName
93  << endl << "Value specified: " << val << endl
94  << "Required range: [0, INT_MAX]" << endl);
95  }
96 
97  // ParameterEntryValidator wants this method.
98  const std::string getXMLTypeName () const {
99  return "NonnegativeIntValidator";
100  }
101 
102  // ParameterEntryValidator wants this method.
103  void
104  printDoc (const std::string& docString,
105  std::ostream &out) const
106  {
107  Teuchos::StrUtils::printLines (out, "# ", docString);
108  out << "#\tValidator Used: " << std::endl;
109  out << "#\t\tNonnegativeIntValidator" << std::endl;
110  }
111  };
112 
113  // A way to get a small positive number (eps() for floating-point
114  // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
115  // define it (for example, for integer values).
116  template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
117  class SmallTraits {
118  public:
119  // Return eps if Scalar is a floating-point type, else return 1.
120  static const Scalar eps ();
121  };
122 
123  // Partial specialization for when Scalar is not a floating-point type.
124  template<class Scalar>
125  class SmallTraits<Scalar, true> {
126  public:
127  static const Scalar eps () {
129  }
130  };
131 
132  // Partial specialization for when Scalar is a floating-point type.
133  template<class Scalar>
134  class SmallTraits<Scalar, false> {
135  public:
136  static const Scalar eps () {
138  }
139  };
140 
141  // Work-around for GitHub Issue #5269.
142  template<class Scalar,
143  const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
144  struct RealTraits {};
145 
146  template<class Scalar>
147  struct RealTraits<Scalar, false> {
148  using val_type = Scalar;
149  using mag_type = Scalar;
150  static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
151  return z;
152  }
153  };
154 
155  template<class Scalar>
156  struct RealTraits<Scalar, true> {
157  using val_type = Scalar;
158  using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
159  static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
160  return Kokkos::ArithTraits<val_type>::real (z);
161  }
162  };
163 
164  template<class Scalar>
165  KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
166  getRealValue (const Scalar& z) {
167  return RealTraits<Scalar>::real (z);
168  }
169 
170 } // namespace (anonymous)
171 
172 namespace Ifpack2 {
173 
174 template<class MatrixType>
175 void
176 Relaxation<MatrixType>::
177 updateCachedMultiVector (const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
178  size_t numVecs) const
179 {
180  // Allocate a multivector if the cached one isn't perfect. Checking
181  // for map pointer equality is much cheaper than Map::isSameAs.
182  if (cachedMV_.is_null () ||
183  map.get () != cachedMV_->getMap ().get () ||
184  cachedMV_->getNumVectors () != numVecs) {
185  using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
186  global_ordinal_type, node_type>;
187  cachedMV_ = Teuchos::rcp (new MV (map, numVecs, false));
188  }
189 }
190 
191 template<class MatrixType>
194 {
195  if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
196  Importer_ = Teuchos::null;
197  pointImporter_ = Teuchos::null;
198  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
199  isInitialized_ = false;
200  IsComputed_ = false;
201  diagOffsets_ = Kokkos::View<size_t*, device_type>();
202  savedDiagOffsets_ = false;
203  hasBlockCrsMatrix_ = false;
204  serialGaussSeidel_ = Teuchos::null;
205  if (! A.is_null ()) {
206  IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
207  }
208  A_ = A;
209  }
210 }
211 
212 template<class MatrixType>
215 : A_ (A),
216  IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
217  false : // a reasonable default if there's no communicator
218  A->getRowMap ()->getComm ()->getSize () > 1)
219 {
220  this->setObjectLabel ("Ifpack2::Relaxation");
221 }
222 
223 
224 template<class MatrixType>
227 {
228  using Teuchos::Array;
230  using Teuchos::parameterList;
231  using Teuchos::RCP;
232  using Teuchos::rcp;
233  using Teuchos::rcp_const_cast;
234  using Teuchos::rcp_implicit_cast;
235  using Teuchos::setStringToIntegralParameter;
237 
238  if (validParams_.is_null ()) {
239  RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
240 
241  // Set a validator that automatically converts from the valid
242  // string options to their enum values.
243  Array<std::string> precTypes (8);
244  precTypes[0] = "Jacobi";
245  precTypes[1] = "Gauss-Seidel";
246  precTypes[2] = "Symmetric Gauss-Seidel";
247  precTypes[3] = "MT Gauss-Seidel";
248  precTypes[4] = "MT Symmetric Gauss-Seidel";
249  precTypes[5] = "Richardson";
250  precTypes[6] = "Two-stage Gauss-Seidel";
251  precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
252  Array<Details::RelaxationType> precTypeEnums (8);
253  precTypeEnums[0] = Details::JACOBI;
254  precTypeEnums[1] = Details::GS;
255  precTypeEnums[2] = Details::SGS;
256  precTypeEnums[3] = Details::MTGS;
257  precTypeEnums[4] = Details::MTSGS;
258  precTypeEnums[5] = Details::RICHARDSON;
259  precTypeEnums[6] = Details::GS2;
260  precTypeEnums[7] = Details::SGS2;
261  const std::string defaultPrecType ("Jacobi");
262  setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
263  defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
264  pl.getRawPtr ());
265 
266  const int numSweeps = 1;
267  RCP<PEV> numSweepsValidator =
268  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
269  pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
270  rcp_const_cast<const PEV> (numSweepsValidator));
271 
272  // number of 'local' outer sweeps for two-stage GS
273  const int numOuterSweeps = 1;
274  RCP<PEV> numOuterSweepsValidator =
275  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
276  pl->set ("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
277  rcp_const_cast<const PEV> (numOuterSweepsValidator));
278  // number of 'local' inner sweeps for two-stage GS
279  const int numInnerSweeps = 1;
280  RCP<PEV> numInnerSweepsValidator =
281  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
282  pl->set ("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
283  rcp_const_cast<const PEV> (numInnerSweepsValidator));
284  // specify damping factor for the inner sweeps of two-stage GS
285  const scalar_type innerDampingFactor = STS::one ();
286  pl->set ("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
287  // specify whether to use sptrsv instead of inner-iterations for two-stage GS
288  const bool innerSpTrsv = false;
289  pl->set ("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
290  // specify whether to use compact form of recurrence for two-stage GS
291  const bool compactForm = false;
292  pl->set ("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
293 
294  const scalar_type dampingFactor = STS::one ();
295  pl->set ("relaxation: damping factor", dampingFactor);
296 
297  const bool zeroStartingSolution = true;
298  pl->set ("relaxation: zero starting solution", zeroStartingSolution);
299 
300  const bool doBackwardGS = false;
301  pl->set ("relaxation: backward mode", doBackwardGS);
302 
303  const bool doL1Method = false;
304  pl->set ("relaxation: use l1", doL1Method);
305 
306  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
307  (STM::one() + STM::one()); // 1.5
308  pl->set ("relaxation: l1 eta", l1eta);
309 
310  const scalar_type minDiagonalValue = STS::zero ();
311  pl->set ("relaxation: min diagonal value", minDiagonalValue);
312 
313  const bool fixTinyDiagEntries = false;
314  pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
315 
316  const bool checkDiagEntries = false;
317  pl->set ("relaxation: check diagonal entries", checkDiagEntries);
318 
319  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
320  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
321 
322  const bool is_matrix_structurally_symmetric = false;
323  pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
324 
325  const bool ifpack2_dump_matrix = false;
326  pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
327 
328  const int cluster_size = 1;
329  pl->set("relaxation: mtgs cluster size", cluster_size);
330 
331  pl->set("relaxation: mtgs coloring algorithm", "Default");
332 
333  const int long_row_threshold = 0;
334  pl->set("relaxation: long row threshold", long_row_threshold);
335 
336  const bool timer_for_apply = true;
337  pl->set("timer for apply", timer_for_apply);
338 
339  validParams_ = rcp_const_cast<const ParameterList> (pl);
340  }
341  return validParams_;
342 }
343 
344 
345 template<class MatrixType>
347 {
348  using Teuchos::getIntegralValue;
349  using Teuchos::getStringValue;
351  using Teuchos::RCP;
352  typedef scalar_type ST; // just to make code below shorter
353 
354  if (pl.isType<double>("relaxation: damping factor")) {
355  // Make sure that ST=complex can run with a damping factor that is
356  // a double.
357  ST df = pl.get<double>("relaxation: damping factor");
358  pl.remove("relaxation: damping factor");
359  pl.set("relaxation: damping factor",df);
360  }
361 
363 
364  const Details::RelaxationType precType =
365  getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
366  const std::string precTypeStr = getStringValue<Details::RelaxationType>(pl, "relaxation: type");
367  // We only access "relaxation: type" using strings in the rest of the code
368  pl.set<std::string>("relaxation: type", precTypeStr);
369  pl.get<std::string>("relaxation: type"); // We need to mark the parameter as "used"
370  const int numSweeps = pl.get<int> ("relaxation: sweeps");
371  const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
372  const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
373  const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
374  const bool doL1Method = pl.get<bool> ("relaxation: use l1");
375  const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
376  const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
377  const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
378  const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
379  const bool is_matrix_structurally_symmetric = pl.get<bool> ("relaxation: symmetric matrix structure");
380  const bool ifpack2_dump_matrix = pl.get<bool> ("relaxation: ifpack2 dump matrix");
381  const bool timer_for_apply = pl.get<bool> ("timer for apply");
382  int cluster_size = 1;
383  if(pl.isParameter ("relaxation: mtgs cluster size")) //optional parameter
384  cluster_size = pl.get<int> ("relaxation: mtgs cluster size");
385  int long_row_threshold = 0;
386  if(pl.isParameter ("relaxation: long row threshold")) //optional parameter
387  long_row_threshold = pl.get<int> ("relaxation: long row threshold");
388  std::string color_algo_name = pl.get<std::string>("relaxation: mtgs coloring algorithm");
389  //convert to lowercase
390  for(char& c : color_algo_name)
391  c = tolower(c);
392  if(color_algo_name == "default")
393  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_DEFAULT;
394  else if(color_algo_name == "serial")
395  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_SERIAL;
396  else if(color_algo_name == "vb")
397  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VB;
398  else if(color_algo_name == "vbbit")
399  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBBIT;
400  else if(color_algo_name == "vbcs")
401  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBCS;
402  else if(color_algo_name == "vbd")
403  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBD;
404  else if(color_algo_name == "vbdbit")
405  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBDBIT;
406  else if(color_algo_name == "eb")
407  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_EB;
408  else
409  {
410  std::ostringstream msg;
411  msg << "Ifpack2::Relaxation: 'relaxation: mtgs coloring algorithm' = '" << color_algo_name << "' is not valid.\n";
412  msg << "Choices (not case sensitive) are: Default, Serial, VB, VBBIT, VBCS, VBD, VBDBIT, EB.";
414  true, std::invalid_argument, msg.str());
415  }
416 
417  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
418 
419  // for Two-stage Gauss-Seidel
420  if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
421  // Make sure that ST=complex can run with a damping factor that is
422  // a double.
423  ST df = pl.get<double>("relaxation: inner damping factor");
424  pl.remove("relaxation: inner damping factor");
425  pl.set("relaxation: inner damping factor",df);
426  }
427  //If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
428  if (long_row_threshold > 0) {
430  cluster_size != 1, std::invalid_argument, "Ifpack2::Relaxation: "
431  "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
433  precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
434  std::invalid_argument, "Ifpack2::Relaxation: "
435  "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
436  "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
437  }
438 
439  const ST innerDampingFactor = pl.get<ST> ("relaxation: inner damping factor");
440  const int numInnerSweeps = pl.get<int> ("relaxation: inner sweeps");
441  const int numOuterSweeps = pl.get<int> ("relaxation: outer sweeps");
442  const bool innerSpTrsv = pl.get<bool> ("relaxation: inner sparse-triangular solve");
443  const bool compactForm = pl.get<bool> ("relaxation: compact form");
444 
445  // "Commit" the changes, now that we've validated everything.
446  PrecType_ = precType;
447  NumSweeps_ = numSweeps;
448  DampingFactor_ = dampingFactor;
449  ZeroStartingSolution_ = zeroStartSol;
450  DoBackwardGS_ = doBackwardGS;
451  DoL1Method_ = doL1Method;
452  L1Eta_ = l1Eta;
453  MinDiagonalValue_ = minDiagonalValue;
454  fixTinyDiagEntries_ = fixTinyDiagEntries;
455  checkDiagEntries_ = checkDiagEntries;
456  clusterSize_ = cluster_size;
457  longRowThreshold_ = long_row_threshold;
458  is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
459  ifpack2_dump_matrix_ = ifpack2_dump_matrix;
460  localSmoothingIndices_ = localSmoothingIndices;
461  // for Two-stage GS
462  NumInnerSweeps_ = numInnerSweeps;
463  NumOuterSweeps_ = numOuterSweeps;
464  InnerSpTrsv_ = innerSpTrsv;
465  InnerDampingFactor_ = innerDampingFactor;
466  CompactForm_ = compactForm;
467  TimerForApply_ = timer_for_apply;
468 }
469 
470 
471 template<class MatrixType>
473 {
474  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
475  // but otherwise, we will get [unused] in pl
476  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
477 }
478 
479 
480 template<class MatrixType>
484  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
485  "The input matrix A is null. Please call setMatrix() with a nonnull "
486  "input matrix before calling this method.");
487  return A_->getRowMap ()->getComm ();
488 }
489 
490 
491 template<class MatrixType>
494  return A_;
495 }
496 
497 
498 template<class MatrixType>
499 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
500  typename MatrixType::global_ordinal_type,
501  typename MatrixType::node_type> >
504  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
505  "The input matrix A is null. Please call setMatrix() with a nonnull "
506  "input matrix before calling this method.");
507  return A_->getDomainMap ();
508 }
509 
510 
511 template<class MatrixType>
512 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
513  typename MatrixType::global_ordinal_type,
514  typename MatrixType::node_type> >
517  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
518  "The input matrix A is null. Please call setMatrix() with a nonnull "
519  "input matrix before calling this method.");
520  return A_->getRangeMap ();
521 }
522 
523 
524 template<class MatrixType>
526  return true;
527 }
528 
529 
530 template<class MatrixType>
532  return(NumInitialize_);
533 }
534 
535 
536 template<class MatrixType>
538  return(NumCompute_);
539 }
540 
541 
542 template<class MatrixType>
544  return(NumApply_);
545 }
546 
547 
548 template<class MatrixType>
550  return(InitializeTime_);
551 }
552 
553 
554 template<class MatrixType>
556  return(ComputeTime_);
557 }
558 
559 
560 template<class MatrixType>
562  return(ApplyTime_);
563 }
564 
565 
566 template<class MatrixType>
568  return(ComputeFlops_);
569 }
570 
571 
572 template<class MatrixType>
574  return(ApplyFlops_);
575 }
576 
577 
578 
579 template<class MatrixType>
582  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
583  "The input matrix A is null. Please call setMatrix() with a nonnull "
584  "input matrix, then call compute(), before calling this method.");
585  // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
586  return A_->getLocalNumRows() + A_->getLocalNumEntries();
587 }
588 
589 
590 template<class MatrixType>
591 void
593 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
594  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
595  Teuchos::ETransp /* mode */,
596  scalar_type alpha,
597  scalar_type beta) const
598 {
599  using Teuchos::as;
600  using Teuchos::RCP;
601  using Teuchos::rcp;
602  using Teuchos::rcpFromRef;
603  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
606  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
607  "The input matrix A is null. Please call setMatrix() with a nonnull "
608  "input matrix, then call compute(), before calling this method.");
610  ! isComputed (),
611  std::runtime_error,
612  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
613  "preconditioner instance before you may call apply(). You may call "
614  "isComputed() to find out if compute() has been called already.");
615  TEUCHOS_TEST_FOR_EXCEPTION(
616  X.getNumVectors() != Y.getNumVectors(),
617  std::runtime_error,
618  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
619  "X has " << X.getNumVectors() << " columns, but Y has "
620  << Y.getNumVectors() << " columns.");
621  TEUCHOS_TEST_FOR_EXCEPTION(
622  beta != STS::zero (), std::logic_error,
623  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
624  "implemented.");
625 
627  const std::string timerName ("Ifpack2::Relaxation::apply");
628  if (TimerForApply_) {
629  timer = Teuchos::TimeMonitor::lookupCounter (timerName);
630  if (timer.is_null ()) {
631  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
632  }
633  }
634 
635  Teuchos::Time time = Teuchos::Time(timerName);
636  double startTime = time.wallTime();
637 
638  {
640  if (TimerForApply_)
641  timeMon = Teuchos::rcp(new Teuchos::TimeMonitor(*timer));
642 
643  // Special case: alpha == 0.
644  if (alpha == STS::zero ()) {
645  // No floating-point operations, so no need to update a count.
646  Y.putScalar (STS::zero ());
647  }
648  else {
649  // If X and Y alias one another, then we need to create an
650  // auxiliary vector, Xcopy (a deep copy of X).
651  RCP<const MV> Xcopy;
652  {
653  if (X.aliases(Y)) {
654  Xcopy = rcp (new MV (X, Teuchos::Copy));
655  } else {
656  Xcopy = rcpFromRef (X);
657  }
658  }
659 
660  // Each of the following methods updates the flop count itself.
661  // All implementations handle zeroing out the starting solution
662  // (if necessary) themselves.
663  switch (PrecType_) {
664  case Ifpack2::Details::JACOBI:
665  ApplyInverseJacobi(*Xcopy,Y);
666  break;
667  case Ifpack2::Details::GS:
668  ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
669  break;
670  case Ifpack2::Details::SGS:
671  ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
672  break;
673  case Ifpack2::Details::MTGS:
674  case Ifpack2::Details::GS2:
675  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
676  break;
677  case Ifpack2::Details::MTSGS:
678  case Ifpack2::Details::SGS2:
679  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
680  break;
681  case Ifpack2::Details::RICHARDSON:
682  ApplyInverseRichardson(*Xcopy,Y);
683  break;
684 
685  default:
686  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
687  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
688  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
689  }
690  if (alpha != STS::one ()) {
691  Y.scale (alpha);
692  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
693  const double numVectors = as<double> (Y.getNumVectors ());
694  ApplyFlops_ += numGlobalRows * numVectors;
695  }
696  }
697  }
698  ApplyTime_ += (time.wallTime() - startTime);
699  ++NumApply_;
700 }
701 
702 
703 template<class MatrixType>
704 void
706 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
707  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
708  Teuchos::ETransp mode) const
709 {
711  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
712  "The input matrix A is null. Please call setMatrix() with a nonnull "
713  "input matrix, then call compute(), before calling this method.");
715  ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
716  "isComputed() must be true before you may call applyMat(). "
717  "Please call compute() before calling this method.");
718  TEUCHOS_TEST_FOR_EXCEPTION(
719  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
720  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
721  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
722  A_->apply (X, Y, mode);
723 }
724 
725 
726 template<class MatrixType>
728 {
729  const char methodName[] = "Ifpack2::Relaxation::initialize";
730 
732  (A_.is_null (), std::runtime_error, methodName << ": The "
733  "input matrix A is null. Please call setMatrix() with "
734  "a nonnull input matrix before calling this method.");
735 
738 
739  double startTime = timer->wallTime();
740 
741  { // Timing of initialize starts here
742  Teuchos::TimeMonitor timeMon (*timer);
743  isInitialized_ = false;
744 
745  {
746  auto rowMap = A_->getRowMap ();
747  auto comm = rowMap.is_null () ? Teuchos::null : rowMap->getComm ();
748  IsParallel_ = ! comm.is_null () && comm->getSize () > 1;
749  }
750 
751  // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
752  // but in that case, the domain and column Maps are the same and
753  // we don't need to Import anyway.
754  //
755  // mfh 07 May 2019: A_->getGraph() might be an
756  // OverlappingRowGraph, which doesn't have an Import object.
757  // However, in that case, the comm should have size 1.
758  Importer_ = IsParallel_ ? A_->getGraph ()->getImporter () :
759  Teuchos::null;
760 
761  {
763  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
764  hasBlockCrsMatrix_ = ! A_bcrs.is_null ();
765  }
766 
767  serialGaussSeidel_ = Teuchos::null;
768 
769  if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
770  PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
771  auto crsMat = Details::getCrsMatrix(A_);
773  (crsMat.is_null(), std::logic_error, methodName << ": "
774  "Multithreaded Gauss-Seidel methods currently only work "
775  "when the input matrix is a Tpetra::CrsMatrix.");
776 
777  // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
778  // compute, not initialize, since users may change the matrix's
779  // values at any time before calling compute.
780  if (ifpack2_dump_matrix_) {
781  static int sequence_number = 0;
782  const std::string file_name = "Ifpack2_MT_GS_" +
783  std::to_string (sequence_number++) + ".mtx";
784  using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
785  writer_type::writeSparseFile (file_name, crsMat);
786  }
787 
788  this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
789  if (mtKernelHandle_->get_gs_handle () == nullptr) {
790  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
791  mtKernelHandle_->create_gs_handle (KokkosSparse::GS_TWOSTAGE);
792  else if(this->clusterSize_ == 1) {
793  mtKernelHandle_->create_gs_handle (KokkosSparse::GS_DEFAULT, this->mtColoringAlgorithm_);
794  mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
795  }
796  else
797  mtKernelHandle_->create_gs_handle (KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_, this->mtColoringAlgorithm_);
798  }
799  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
800  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
801  // set parameters for two-stage GS
802  mtKernelHandle_->set_gs_set_num_inner_sweeps (NumInnerSweeps_);
803  mtKernelHandle_->set_gs_set_num_outer_sweeps (NumOuterSweeps_);
804  mtKernelHandle_->set_gs_set_inner_damp_factor (InnerDampingFactor_);
805  mtKernelHandle_->set_gs_twostage (!InnerSpTrsv_, A_->getLocalNumRows ());
806  mtKernelHandle_->set_gs_twostage_compact_form (CompactForm_);
807  }
808 
809  KokkosSparse::Experimental::gauss_seidel_symbolic(
810  mtKernelHandle_.getRawPtr (),
811  A_->getLocalNumRows (),
812  A_->getLocalNumCols (),
813  kcsr.graph.row_map,
814  kcsr.graph.entries,
815  is_matrix_structurally_symmetric_);
816  }
817  } // timing of initialize stops here
818 
819  InitializeTime_ += (timer->wallTime() - startTime);
820  ++NumInitialize_;
821  isInitialized_ = true;
822 }
823 
824 namespace Impl {
825 template <typename BlockDiagView>
826 struct InvertDiagBlocks {
827  typedef typename BlockDiagView::size_type Size;
828 
829 private:
830  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
831  template <typename View>
832  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
833  typename View::device_type, Unmanaged>;
834 
835  typedef typename BlockDiagView::non_const_value_type Scalar;
836  typedef typename BlockDiagView::device_type Device;
837  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
838  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
839 
840  UnmanagedView<BlockDiagView> block_diag_;
841  // TODO Use thread team and scratch memory space. In this first
842  // pass, provide workspace for each block.
843  RWrk rwrk_buf_;
844  UnmanagedView<RWrk> rwrk_;
845  IWrk iwrk_buf_;
846  UnmanagedView<IWrk> iwrk_;
847 
848 public:
849  InvertDiagBlocks (BlockDiagView& block_diag)
850  : block_diag_(block_diag)
851  {
852  const auto blksz = block_diag.extent(1);
853  Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
854  rwrk_ = rwrk_buf_;
855  Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
856  iwrk_ = iwrk_buf_;
857  }
858 
859  KOKKOS_INLINE_FUNCTION
860  void operator() (const Size i, int& jinfo) const {
861  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
862  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
863  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
864  int info = 0;
865  Tpetra::GETF2(D_cur, ipiv, info);
866  if (info) {
867  ++jinfo;
868  return;
869  }
870  Tpetra::GETRI(D_cur, ipiv, work, info);
871  if (info) ++jinfo;
872  }
873 };
874 }
875 
876 template<class MatrixType>
877 void Relaxation<MatrixType>::computeBlockCrs ()
878 {
879  using Kokkos::ALL;
880  using Teuchos::Array;
881  using Teuchos::ArrayRCP;
882  using Teuchos::ArrayView;
883  using Teuchos::as;
884  using Teuchos::Comm;
885  using Teuchos::RCP;
886  using Teuchos::rcp;
887  using Teuchos::REDUCE_MAX;
888  using Teuchos::REDUCE_MIN;
889  using Teuchos::REDUCE_SUM;
890  using Teuchos::rcp_dynamic_cast;
891  using Teuchos::reduceAll;
892  typedef local_ordinal_type LO;
893 
894  const std::string timerName ("Ifpack2::Relaxation::computeBlockCrs");
896  if (timer.is_null ()) {
897  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
898  }
899  double startTime = timer->wallTime();
900  {
901  Teuchos::TimeMonitor timeMon (*timer);
902 
904  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
905  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
906  "with a nonnull input matrix, then call initialize(), before calling "
907  "this method.");
908  auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type> (A_);
910  blockCrsA.is_null(), std::logic_error, "Ifpack2::Relaxation::"
911  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
912  "got this far. Please report this bug to the Ifpack2 developers.");
913 
914  const scalar_type one = STS::one ();
915 
916  // Reset state.
917  IsComputed_ = false;
918 
919  const LO lclNumMeshRows =
920  blockCrsA->getCrsGraph ().getLocalNumRows ();
921  const LO blockSize = blockCrsA->getBlockSize ();
922 
923  if (! savedDiagOffsets_) {
924  blockDiag_ = block_diag_type (); // clear it before reallocating
925  blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
926  lclNumMeshRows, blockSize, blockSize);
927  if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
928  // Clear diagOffsets_ first (by assigning an empty View to it)
929  // to save memory, before reallocating.
930  diagOffsets_ = Kokkos::View<size_t*, device_type> ();
931  diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
932  }
933  blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
935  (static_cast<size_t> (diagOffsets_.extent (0)) !=
936  static_cast<size_t> (blockDiag_.extent (0)),
937  std::logic_error, "diagOffsets_.extent(0) = " <<
938  diagOffsets_.extent (0) << " != blockDiag_.extent(0) = "
939  << blockDiag_.extent (0) <<
940  ". Please report this bug to the Ifpack2 developers.");
941  savedDiagOffsets_ = true;
942  }
943  blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
944 
945  // Use an unmanaged View in this method, so that when we take
946  // subviews of it (to get each diagonal block), we don't have to
947  // touch the reference count. Reference count updates are a
948  // thread scalability bottleneck and have a performance cost even
949  // without using threads.
950  unmanaged_block_diag_type blockDiag = blockDiag_;
951 
952  if (DoL1Method_ && IsParallel_) {
953  const scalar_type two = one + one;
954  const size_t maxLength = A_->getLocalMaxNumRowEntries ();
955  nonconst_local_inds_host_view_type indices ("indices",maxLength);
956  nonconst_values_host_view_type values_ ("values",maxLength * blockSize * blockSize);
957  size_t numEntries = 0;
958 
959  for (LO i = 0; i < lclNumMeshRows; ++i) {
960  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
961  blockCrsA->getLocalRowCopy (i, indices, values_, numEntries);
962  scalar_type * values = reinterpret_cast<scalar_type*>(values_.data());
963 
964  auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
965  for (LO subRow = 0; subRow < blockSize; ++subRow) {
966  magnitude_type diagonal_boost = STM::zero ();
967  for (size_t k = 0 ; k < numEntries ; ++k) {
968  if (indices[k] >= lclNumMeshRows) {
969  const size_t offset = blockSize*blockSize*k + subRow*blockSize;
970  for (LO subCol = 0; subCol < blockSize; ++subCol) {
971  diagonal_boost += STS::magnitude (values[offset+subCol] / two);
972  }
973  }
974  }
975  if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
976  diagBlock(subRow, subRow) += diagonal_boost;
977  }
978  }
979  }
980  }
981 
982  int info = 0;
983  {
984  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
985  typedef typename unmanaged_block_diag_type::execution_space exec_space;
986  typedef Kokkos::RangePolicy<exec_space, LO> range_type;
987 
988  Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
989  // FIXME (mfh 19 May 2017) Different processes may not
990  // necessarily have this error all at the same time, so it would
991  // be better just to preserve a local error state and let users
992  // check.
994  (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
995  << " diagonal blocks.");
996  }
997 
998  // In a debug build, do an extra test to make sure that all the
999  // factorizations were computed correctly.
1000 #ifdef HAVE_IFPACK2_DEBUG
1001  const int numResults = 2;
1002  // Use "max = -min" trick to get min and max in a single all-reduce.
1003  int lclResults[2], gblResults[2];
1004  lclResults[0] = info;
1005  lclResults[1] = -info;
1006  gblResults[0] = 0;
1007  gblResults[1] = 0;
1008  reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
1009  numResults, lclResults, gblResults);
1011  (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
1012  "Ifpack2::Relaxation::compute: When processing the input "
1013  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
1014  "failed on one or more (MPI) processes.");
1015 #endif // HAVE_IFPACK2_DEBUG
1016  serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
1017  } // end TimeMonitor scope
1018 
1019  ComputeTime_ += (timer->wallTime() - startTime);
1020  ++NumCompute_;
1021  IsComputed_ = true;
1022 }
1023 
1024 template<class MatrixType>
1026 {
1027  using Teuchos::Array;
1028  using Teuchos::ArrayRCP;
1029  using Teuchos::ArrayView;
1030  using Teuchos::as;
1031  using Teuchos::Comm;
1032  using Teuchos::RCP;
1033  using Teuchos::rcp;
1034  using Teuchos::REDUCE_MAX;
1035  using Teuchos::REDUCE_MIN;
1036  using Teuchos::REDUCE_SUM;
1037  using Teuchos::reduceAll;
1038  using LO = local_ordinal_type;
1039  using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
1041  using IST = typename vector_type::impl_scalar_type;
1042  using KAT = Kokkos::ArithTraits<IST>;
1043 
1044  const char methodName[] = "Ifpack2::Relaxation::compute";
1045  const scalar_type zero = STS::zero ();
1046  const scalar_type one = STS::one ();
1047 
1049  (A_.is_null (), std::runtime_error, methodName << ": "
1050  "The input matrix A is null. Please call setMatrix() with a nonnull "
1051  "input matrix, then call initialize(), before calling this method.");
1052 
1053  // We don't count initialization in compute() time.
1054  if (! isInitialized ()) {
1055  initialize ();
1056  }
1057 
1058  if (hasBlockCrsMatrix_) {
1059  computeBlockCrs ();
1060  return;
1061  }
1062 
1065  double startTime = timer->wallTime();
1066 
1067  { // Timing of compute starts here.
1068  Teuchos::TimeMonitor timeMon (*timer);
1069 
1070  // Precompute some quantities for "fixing" zero or tiny diagonal
1071  // entries. We'll only use them if this "fixing" is enabled.
1072  //
1073  // SmallTraits covers for the lack of eps() in
1074  // Teuchos::ScalarTraits when its template parameter is not a
1075  // floating-point type. (Ifpack2 sometimes gets instantiated for
1076  // integer Scalar types.)
1077  IST oneOverMinDiagVal = KAT::one () / static_cast<IST> (SmallTraits<scalar_type>::eps ());
1078  if ( MinDiagonalValue_ != zero)
1079  oneOverMinDiagVal = KAT::one () / static_cast<IST> (MinDiagonalValue_);
1080 
1081  // It's helpful not to have to recompute this magnitude each time.
1082  const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1083 
1084  const LO numMyRows = static_cast<LO> (A_->getLocalNumRows ());
1085 
1087  (NumSweeps_ < 0, std::logic_error, methodName
1088  << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1089  "Please report this bug to the Ifpack2 developers.");
1090  IsComputed_ = false;
1091 
1092  if (Diagonal_.is_null()) {
1093  // A_->getLocalDiagCopy fills in all Vector entries, even if the
1094  // matrix has no stored entries in the corresponding rows.
1095  Diagonal_ = rcp (new vector_type (A_->getRowMap (), false));
1096  }
1097 
1098  if (checkDiagEntries_) {
1099  //
1100  // Check diagonal elements, replace zero elements with the minimum
1101  // diagonal value, and store their inverses. Count the number of
1102  // "small" and zero diagonal entries while we're at it.
1103  //
1104  size_t numSmallDiagEntries = 0; // "small" includes zero
1105  size_t numZeroDiagEntries = 0; // # zero diagonal entries
1106  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1107  magnitude_type minMagDiagEntryMag = STM::zero ();
1108  magnitude_type maxMagDiagEntryMag = STM::zero ();
1109 
1110  // FIXME: We are extracting the diagonal more than once. That
1111  // isn't very efficient, but we shouldn't be checking
1112  // diagonal entries at scale anyway.
1113  A_->getLocalDiagCopy (*Diagonal_); // slow path for row matrix
1114  std::unique_ptr<vector_type> origDiag;
1115  origDiag = std::unique_ptr<vector_type> (new vector_type (*Diagonal_, Teuchos::Copy));
1116  auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1117  auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1118  // As we go, keep track of the diagonal entries with the
1119  // least and greatest magnitude. We could use the trick of
1120  // starting min with +Inf and max with -Inf, but that
1121  // doesn't work if scalar_type is a built-in integer type.
1122  // Thus, we have to start by reading the first diagonal
1123  // entry redundantly.
1124  if (numMyRows != 0) {
1125  const magnitude_type d_0_mag = KAT::abs (diag(0));
1126  minMagDiagEntryMag = d_0_mag;
1127  maxMagDiagEntryMag = d_0_mag;
1128  }
1129 
1130  // Go through all the diagonal entries. Compute counts of
1131  // small-magnitude, zero, and negative-real-part entries.
1132  // Invert the diagonal entries that aren't too small. For
1133  // those too small in magnitude, replace them with
1134  // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1135  // happens to be zero).
1136  for (LO i = 0; i < numMyRows; ++i) {
1137  const IST d_i = diag(i);
1138  const magnitude_type d_i_mag = KAT::abs (d_i);
1139  // Work-around for GitHub Issue #5269.
1140  //const magnitude_type d_i_real = KAT::real (d_i);
1141  const auto d_i_real = getRealValue (d_i);
1142 
1143  // We can't compare complex numbers, but we can compare their
1144  // real parts.
1145  if (d_i_real < STM::zero ()) {
1146  ++numNegDiagEntries;
1147  }
1148  if (d_i_mag < minMagDiagEntryMag) {
1149  minMagDiagEntryMag = d_i_mag;
1150  }
1151  if (d_i_mag > maxMagDiagEntryMag) {
1152  maxMagDiagEntryMag = d_i_mag;
1153  }
1154 
1155  if (fixTinyDiagEntries_) {
1156  // <= not <, in case minDiagValMag is zero.
1157  if (d_i_mag <= minDiagValMag) {
1158  ++numSmallDiagEntries;
1159  if (d_i_mag == STM::zero ()) {
1160  ++numZeroDiagEntries;
1161  }
1162  diag(i) = oneOverMinDiagVal;
1163  }
1164  else {
1165  diag(i) = KAT::one () / d_i;
1166  }
1167  }
1168  else { // Don't fix zero or tiny diagonal entries.
1169  // <= not <, in case minDiagValMag is zero.
1170  if (d_i_mag <= minDiagValMag) {
1171  ++numSmallDiagEntries;
1172  if (d_i_mag == STM::zero ()) {
1173  ++numZeroDiagEntries;
1174  }
1175  }
1176  diag(i) = KAT::one () / d_i;
1177  }
1178  }
1179 
1180  // Collect global data about the diagonal entries. Only do this
1181  // (which involves O(1) all-reduces) if the user asked us to do
1182  // the extra work.
1183  //
1184  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1185  // zero rows. Fixing this requires one of two options:
1186  //
1187  // 1. Use a custom reduction operation that ignores processes
1188  // with zero diagonal entries.
1189  // 2. Split the communicator, compute all-reduces using the
1190  // subcommunicator over processes that have a nonzero number
1191  // of diagonal entries, and then broadcast from one of those
1192  // processes (if there is one) to the processes in the other
1193  // subcommunicator.
1194  const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1195 
1196  // Compute global min and max magnitude of entries.
1197  Array<magnitude_type> localVals (2);
1198  localVals[0] = minMagDiagEntryMag;
1199  // (- (min (- x))) is the same as (max x).
1200  localVals[1] = -maxMagDiagEntryMag;
1201  Array<magnitude_type> globalVals (2);
1202  reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1203  localVals.getRawPtr (),
1204  globalVals.getRawPtr ());
1205  globalMinMagDiagEntryMag_ = globalVals[0];
1206  globalMaxMagDiagEntryMag_ = -globalVals[1];
1207 
1208  Array<size_t> localCounts (3);
1209  localCounts[0] = numSmallDiagEntries;
1210  localCounts[1] = numZeroDiagEntries;
1211  localCounts[2] = numNegDiagEntries;
1212  Array<size_t> globalCounts (3);
1213  reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1214  localCounts.getRawPtr (),
1215  globalCounts.getRawPtr ());
1216  globalNumSmallDiagEntries_ = globalCounts[0];
1217  globalNumZeroDiagEntries_ = globalCounts[1];
1218  globalNumNegDiagEntries_ = globalCounts[2];
1219 
1220  // Compute and save the difference between the computed inverse
1221  // diagonal, and the original diagonal's inverse.
1222  vector_type diff (A_->getRowMap ());
1223  diff.reciprocal (*origDiag);
1224  diff.update (-one, *Diagonal_, one);
1225  globalDiagNormDiff_ = diff.norm2 ();
1226  }
1227 
1228  // Extract the diagonal entries. The CrsMatrix graph version is
1229  // faster than the row matrix version for subsequent calls to
1230  // compute(), since it caches the diagonal offsets.
1231 
1232  bool debugAgainstSlowPath = false;
1233 
1234  auto crsMat = Details::getCrsMatrix(A_);
1235 
1236  if (crsMat.get() && crsMat->isFillComplete ()) {
1237  // The invDiagKernel object computes diagonal offsets if
1238  // necessary. The "compute" call extracts diagonal enties,
1239  // optionally applies the L1 method and replacement of small
1240  // entries, and then inverts.
1241  if (invDiagKernel_.is_null())
1242  invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(crsMat));
1243  else
1244  invDiagKernel_->setMatrix(crsMat);
1245  invDiagKernel_->compute(*Diagonal_,
1246  DoL1Method_ && IsParallel_, L1Eta_,
1247  fixTinyDiagEntries_, minDiagValMag);
1248  savedDiagOffsets_ = true;
1249 
1250  // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1251  // environment variable to control this behavior at run time.
1252 #ifdef HAVE_IFPACK2_DEBUG
1253  debugAgainstSlowPath = true;
1254 #endif
1255  }
1256 
1257  if (crsMat.is_null() || ! crsMat->isFillComplete () || debugAgainstSlowPath) {
1258  // We could not call the CrsMatrix version above, or want to
1259  // debug by comparing against the slow path.
1260 
1261  // FIXME: The L1 method in this code path runs on host, since we
1262  // don't have device access for row matrices.
1263 
1264  RCP<vector_type> Diagonal;
1265  if (debugAgainstSlowPath)
1266  Diagonal = rcp(new vector_type(A_->getRowMap ()));
1267  else
1268  Diagonal = Diagonal_;
1269 
1270  A_->getLocalDiagCopy (*Diagonal); // slow path for row matrix
1271 
1272  // Setup for L1 Methods.
1273  // Here we add half the value of the off-processor entries in the row,
1274  // but only if diagonal isn't sufficiently large.
1275  //
1276  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1277  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1278  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1279  //
1280  if (DoL1Method_ && IsParallel_) {
1281  const row_matrix_type& A_row = *A_;
1282  auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1283  const magnitude_type two = STM::one () + STM::one ();
1284  const size_t maxLength = A_row.getLocalMaxNumRowEntries ();
1285  nonconst_local_inds_host_view_type indices("indices",maxLength);
1286  nonconst_values_host_view_type values("values",maxLength);
1287  size_t numEntries;
1288 
1289  for (LO i = 0; i < numMyRows; ++i) {
1290  A_row.getLocalRowCopy (i, indices, values, numEntries);
1291  magnitude_type diagonal_boost = STM::zero ();
1292  for (size_t k = 0 ; k < numEntries; ++k) {
1293  if (indices[k] >= numMyRows) {
1294  diagonal_boost += STS::magnitude (values[k] / two);
1295  }
1296  }
1297  if (KAT::magnitude (diag(i, 0)) < L1Eta_ * diagonal_boost) {
1298  diag(i, 0) += diagonal_boost;
1299  }
1300  }
1301  }
1302 
1303  //
1304  // Invert the matrix's diagonal entries (in Diagonal).
1305  //
1306  if (fixTinyDiagEntries_) {
1307  // Go through all the diagonal entries. Invert those that
1308  // aren't too small in magnitude. For those that are too
1309  // small in magnitude, replace them with oneOverMinDiagVal.
1310  auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1311  Kokkos::parallel_for(Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1312  KOKKOS_LAMBDA (local_ordinal_type i) {
1313  auto d_i = localDiag(i, 0);
1314  const magnitude_type d_i_mag = KAT::magnitude (d_i);
1315  // <= not <, in case minDiagValMag is zero.
1316  if (d_i_mag <= minDiagValMag) {
1317  d_i = oneOverMinDiagVal;
1318  }
1319  else {
1320  // For Stokhos types, operator/ returns an expression
1321  // type. Explicitly convert to IST before returning.
1322  d_i = IST (KAT::one () / d_i);
1323  }
1324  localDiag(i, 0) = d_i;
1325  });
1326  }
1327  else { // don't fix tiny or zero diagonal entries
1328  Diagonal->reciprocal (*Diagonal);
1329  }
1330 
1331  if (debugAgainstSlowPath) {
1332  // Validate the fast-path diagonal against the slow-path diagonal.
1333  Diagonal->update (STS::one (), *Diagonal_, -STS::one ());
1334  const magnitude_type err = Diagonal->normInf ();
1335  // The two diagonals should be exactly the same, so their
1336  // difference should be exactly zero.
1338  (err > 100*STM::eps(), std::logic_error, methodName << ": "
1339  << "\"fast-path\" diagonal computation failed. "
1340  "\\|D1 - D2\\|_inf = " << err << ".");
1341  }
1342  }
1343 
1344  // Count floating-point operations of computing the inverse diagonal.
1345  //
1346  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1347  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1348  ComputeFlops_ += 4.0 * numMyRows;
1349  }
1350  else {
1351  ComputeFlops_ += numMyRows;
1352  }
1353 
1354  if (PrecType_ == Ifpack2::Details::MTGS ||
1355  PrecType_ == Ifpack2::Details::MTSGS ||
1356  PrecType_ == Ifpack2::Details::GS2 ||
1357  PrecType_ == Ifpack2::Details::SGS2) {
1358 
1359  //KokkosKernels GaussSeidel Initialization.
1360  using scalar_view_t = typename local_matrix_device_type::values_type;
1361 
1363  (crsMat.is_null(), std::logic_error, methodName << ": "
1364  "Multithreaded Gauss-Seidel methods currently only work "
1365  "when the input matrix is a Tpetra::CrsMatrix.");
1366  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
1367 
1368  //TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1369  //const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1370  auto diagView_2d = Diagonal_->getLocalViewDevice (Tpetra::Access::ReadWrite);
1371  scalar_view_t diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1372  KokkosSparse::Experimental::gauss_seidel_numeric(
1373  mtKernelHandle_.getRawPtr (),
1374  A_->getLocalNumRows (),
1375  A_->getLocalNumCols (),
1376  kcsr.graph.row_map,
1377  kcsr.graph.entries,
1378  kcsr.values,
1379  diagView_1d,
1380  is_matrix_structurally_symmetric_);
1381  }
1382  else if(PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1383  if(crsMat)
1384  serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1385  else
1386  serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1387  }
1388  } // end TimeMonitor scope
1389 
1390  ComputeTime_ += (timer->wallTime() - startTime);
1391  ++NumCompute_;
1392  IsComputed_ = true;
1393 }
1394 
1395 
1396 template<class MatrixType>
1397 void
1399 ApplyInverseRichardson (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1400  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1401 {
1402  using Teuchos::as;
1403  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1404  const double numVectors = as<double> (X.getNumVectors ());
1405  if (ZeroStartingSolution_) {
1406  // For the first Richardson sweep, if we are allowed to assume that
1407  // the initial guess is zero, then Richardson is just alpha times the RHS
1408  // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1409  Y.scale(DampingFactor_,X);
1410 
1411  // Count (global) floating-point operations. Ifpack2 represents
1412  // this as a floating-point number rather than an integer, so that
1413  // overflow (for a very large number of calls, or a very large
1414  // problem) is approximate instead of catastrophic.
1415  double flopUpdate = 0.0;
1416  if (DampingFactor_ == STS::one ()) {
1417  // Y(i,j) = X(i,j): one multiply for each entry of Y.
1418  flopUpdate = numGlobalRows * numVectors;
1419  } else {
1420  // Y(i,j) = DampingFactor_ * X(i,j):
1421  // One multiplies per entry of Y.
1422  flopUpdate = numGlobalRows * numVectors;
1423  }
1424  ApplyFlops_ += flopUpdate;
1425  if (NumSweeps_ == 1) {
1426  return;
1427  }
1428  }
1429  // If we were allowed to assume that the starting guess was zero,
1430  // then we have already done the first sweep above.
1431  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1432 
1433  // Allocate a multivector if the cached one isn't perfect
1434  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1435 
1436  for (int j = startSweep; j < NumSweeps_; ++j) {
1437  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1438  Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1439  Y.update(DampingFactor_,*cachedMV_,STS::one());
1440  }
1441 
1442  // For each column of output, for each pass over the matrix:
1443  //
1444  // - One + and one * for each matrix entry
1445  // - One / and one + for each row of the matrix
1446  // - If the damping factor is not one: one * for each row of the
1447  // matrix. (It's not fair to count this if the damping factor is
1448  // one, since the implementation could skip it. Whether it does
1449  // or not is the implementation's choice.)
1450 
1451  // Floating-point operations due to the damping factor, per matrix
1452  // row, per direction, per columm of output.
1453  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1454  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1455  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1456  (2.0 * numGlobalNonzeros + dampingFlops);
1457 }
1458 
1459 
1460 template<class MatrixType>
1461 void
1462 Relaxation<MatrixType>::
1463 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1464  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1465 {
1466  using Teuchos::as;
1467  if (hasBlockCrsMatrix_) {
1468  ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1469  return;
1470  }
1471 
1472  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1473  const double numVectors = as<double> (X.getNumVectors ());
1474  if (ZeroStartingSolution_) {
1475  // For the first Jacobi sweep, if we are allowed to assume that
1476  // the initial guess is zero, then Jacobi is just diagonal
1477  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1478  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1479  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1480 
1481  // Count (global) floating-point operations. Ifpack2 represents
1482  // this as a floating-point number rather than an integer, so that
1483  // overflow (for a very large number of calls, or a very large
1484  // problem) is approximate instead of catastrophic.
1485  double flopUpdate = 0.0;
1486  if (DampingFactor_ == STS::one ()) {
1487  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1488  flopUpdate = numGlobalRows * numVectors;
1489  } else {
1490  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1491  // Two multiplies per entry of Y.
1492  flopUpdate = 2.0 * numGlobalRows * numVectors;
1493  }
1494  ApplyFlops_ += flopUpdate;
1495  if (NumSweeps_ == 1) {
1496  return;
1497  }
1498  }
1499  // If we were allowed to assume that the starting guess was zero,
1500  // then we have already done the first sweep above.
1501  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1502 
1503  // Allocate a multivector if the cached one isn't perfect
1504  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1505 
1506  for (int j = startSweep; j < NumSweeps_; ++j) {
1507  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1508  Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1509  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1510  }
1511 
1512  // For each column of output, for each pass over the matrix:
1513  //
1514  // - One + and one * for each matrix entry
1515  // - One / and one + for each row of the matrix
1516  // - If the damping factor is not one: one * for each row of the
1517  // matrix. (It's not fair to count this if the damping factor is
1518  // one, since the implementation could skip it. Whether it does
1519  // or not is the implementation's choice.)
1520 
1521  // Floating-point operations due to the damping factor, per matrix
1522  // row, per direction, per columm of output.
1523  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1524  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1525  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1526  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1527 }
1528 
1529 template<class MatrixType>
1530 void
1531 Relaxation<MatrixType>::
1532 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1533  local_ordinal_type,
1534  global_ordinal_type,
1535  node_type>& X,
1536  Tpetra::MultiVector<scalar_type,
1537  local_ordinal_type,
1538  global_ordinal_type,
1539  node_type>& Y) const
1540 {
1541  using Tpetra::BlockMultiVector;
1542  using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1543  global_ordinal_type, node_type>;
1544 
1545  const block_crs_matrix_type* blockMatConst =
1546  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1548  (blockMatConst == nullptr, std::logic_error, "This method should "
1549  "never be called if the matrix A_ is not a BlockCrsMatrix. "
1550  "Please report this bug to the Ifpack2 developers.");
1551  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1552  // This is because applyBlock() is nonconst (more accurate), while
1553  // apply() is const (required by Tpetra::Operator interface, but a
1554  // lie, because it possibly allocates temporary buffers).
1555  block_crs_matrix_type* blockMat =
1556  const_cast<block_crs_matrix_type*> (blockMatConst);
1557 
1558  auto meshRowMap = blockMat->getRowMap ();
1559  auto meshColMap = blockMat->getColMap ();
1560  const local_ordinal_type blockSize = blockMat->getBlockSize ();
1561 
1562  BMV X_blk (X, *meshColMap, blockSize);
1563  BMV Y_blk (Y, *meshRowMap, blockSize);
1564 
1565  if (ZeroStartingSolution_) {
1566  // For the first sweep, if we are allowed to assume that the
1567  // initial guess is zero, then block Jacobi is just block diagonal
1568  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1569  Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1570  if (NumSweeps_ == 1) {
1571  return;
1572  }
1573  }
1574 
1575  auto pointRowMap = Y.getMap ();
1576  const size_t numVecs = X.getNumVectors ();
1577 
1578  // We don't need to initialize the result MV, since the sparse
1579  // mat-vec will clobber its contents anyway.
1580  BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1581 
1582  // If we were allowed to assume that the starting guess was zero,
1583  // then we have already done the first sweep above.
1584  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1585 
1586  for (int j = startSweep; j < NumSweeps_; ++j) {
1587  blockMat->applyBlock (Y_blk, A_times_Y);
1588  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1589  Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1590  X_blk, A_times_Y, STS::one ());
1591  }
1592 }
1593 
1594 template<class MatrixType>
1595 void
1596 Relaxation<MatrixType>::
1597 ApplyInverseSerialGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1598  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1599  Tpetra::ESweepDirection direction) const
1600 {
1601  using this_type = Relaxation<MatrixType>;
1602  // The CrsMatrix version is faster, because it can access the sparse
1603  // matrix data directly, rather than by copying out each row's data
1604  // in turn. Thus, we check whether the RowMatrix is really a
1605  // CrsMatrix.
1606  //
1607  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1608  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1609  // will still be correct if the cast fails, but it will use an
1610  // unoptimized kernel.
1611  auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
1612  auto crsMat = Details::getCrsMatrix(A_);
1613  if (blockCrsMat.get()) {
1614  const_cast<this_type&> (*this).ApplyInverseSerialGS_BlockCrsMatrix (*blockCrsMat, X, Y, direction);
1615  }
1616  else if (crsMat.get()) {
1617  ApplyInverseSerialGS_CrsMatrix (*crsMat, X, Y, direction);
1618  }
1619  else {
1620  ApplyInverseSerialGS_RowMatrix (X, Y, direction);
1621  }
1622 }
1623 
1624 
1625 template<class MatrixType>
1626 void
1627 Relaxation<MatrixType>::
1628 ApplyInverseSerialGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1629  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1630  Tpetra::ESweepDirection direction) const {
1631  using Teuchos::Array;
1632  using Teuchos::ArrayRCP;
1633  using Teuchos::ArrayView;
1634  using Teuchos::as;
1635  using Teuchos::RCP;
1636  using Teuchos::rcp;
1637  using Teuchos::rcpFromRef;
1638  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1639 
1640  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1641  // starting multivector itself. The generic RowMatrix version here
1642  // does not, so we have to zero out Y here.
1643  if (ZeroStartingSolution_) {
1644  Y.putScalar (STS::zero ());
1645  }
1646 
1647  size_t NumVectors = X.getNumVectors();
1648 
1649  RCP<MV> Y2;
1650  if (IsParallel_) {
1651  if (Importer_.is_null ()) { // domain and column Maps are the same.
1652  updateCachedMultiVector (Y.getMap (), NumVectors);
1653  }
1654  else {
1655  updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
1656  }
1657  Y2 = cachedMV_;
1658  }
1659  else {
1660  Y2 = rcpFromRef (Y);
1661  }
1662 
1663  for (int j = 0; j < NumSweeps_; ++j) {
1664  // data exchange is here, once per sweep
1665  if (IsParallel_) {
1666  if (Importer_.is_null ()) {
1667  // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1668  // clear if this is correct. Reevaluate at some point.
1669 
1670  *Y2 = Y; // just copy, since domain and column Maps are the same
1671  } else {
1672  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1673  }
1674  }
1675  serialGaussSeidel_->apply(*Y2, X, direction);
1676 
1677  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1678  if (IsParallel_) {
1679  Tpetra::deep_copy (Y, *Y2);
1680  }
1681  }
1682 
1683  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1684  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1685  const double numVectors = as<double> (X.getNumVectors ());
1686  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1687  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1688  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1689  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1690 }
1691 
1692 
1693 template<class MatrixType>
1694 void
1695 Relaxation<MatrixType>::
1696 ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1697  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1698  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1699  Tpetra::ESweepDirection direction) const
1700 {
1701  using Teuchos::null;
1702  using Teuchos::RCP;
1703  using Teuchos::rcp;
1704  using Teuchos::rcpFromRef;
1705  using Teuchos::rcp_const_cast;
1706  typedef scalar_type Scalar;
1707  const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1708  const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1709 
1711  ! A.isFillComplete (), std::runtime_error,
1712  prefix << "The matrix is not fill complete.");
1714  NumSweeps_ < 0, std::invalid_argument,
1715  prefix << "The number of sweeps must be nonnegative, "
1716  "but you provided numSweeps = " << NumSweeps_ << " < 0.");
1717 
1718  if (NumSweeps_ == 0) {
1719  return;
1720  }
1721 
1722  RCP<const import_type> importer = A.getGraph ()->getImporter ();
1723 
1724  RCP<const map_type> domainMap = A.getDomainMap ();
1725  RCP<const map_type> rangeMap = A.getRangeMap ();
1726  RCP<const map_type> rowMap = A.getGraph ()->getRowMap ();
1727  RCP<const map_type> colMap = A.getGraph ()->getColMap ();
1728 
1729 #ifdef HAVE_IFPACK2_DEBUG
1730  {
1731  // The relation 'isSameAs' is transitive. It's also a
1732  // collective, so we don't have to do a "shared" test for
1733  // exception (i.e., a global reduction on the test value).
1735  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1736  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1737  "multivector X be in the domain Map of the matrix.");
1739  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1740  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1741  "B be in the range Map of the matrix.");
1743  ! Diagonal_->getMap ()->isSameAs (*rowMap), std::runtime_error,
1744  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1745  "D be in the row Map of the matrix.");
1747  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1748  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1749  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1751  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1752  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1753  "the range Map of the matrix be the same.");
1754  }
1755 #endif
1756 
1757  // Fetch a (possibly cached) temporary column Map multivector
1758  // X_colMap, and a domain Map view X_domainMap of it. Both have
1759  // constant stride by construction. We know that the domain Map
1760  // must include the column Map, because our Gauss-Seidel kernel
1761  // requires that the row Map, domain Map, and range Map are all
1762  // the same, and that each process owns all of its own diagonal
1763  // entries of the matrix.
1764 
1765  RCP<multivector_type> X_colMap;
1766  RCP<multivector_type> X_domainMap;
1767  bool copyBackOutput = false;
1768  if (importer.is_null ()) {
1769  X_colMap = Teuchos::rcpFromRef (X);
1770  X_domainMap = Teuchos::rcpFromRef (X);
1771  // Column Map and domain Map are the same, so there are no
1772  // remote entries. Thus, if we are not setting the initial
1773  // guess to zero, we don't have to worry about setting remote
1774  // entries to zero, even though we are not doing an Import in
1775  // this case.
1776  if (ZeroStartingSolution_) {
1777  X_colMap->putScalar (ZERO);
1778  }
1779  // No need to copy back to X at end.
1780  }
1781  else { // Column Map and domain Map are _not_ the same.
1782  updateCachedMultiVector(colMap, X.getNumVectors());
1783  X_colMap = cachedMV_;
1784  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1785 
1786  if (ZeroStartingSolution_) {
1787  // No need for an Import, since we're filling with zeros.
1788  X_colMap->putScalar (ZERO);
1789  } else {
1790  // We could just copy X into X_domainMap. However, that
1791  // wastes a copy, because the Import also does a copy (plus
1792  // communication). Since the typical use case for
1793  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1794  // don't want to waste that copy. Thus, we do the Import
1795  // here, and skip the first Import in the first sweep.
1796  // Importing directly from X effects the copy into X_domainMap
1797  // (which is a view of X_colMap).
1798  X_colMap->doImport (X, *importer, Tpetra::INSERT);
1799  }
1800  copyBackOutput = true; // Don't forget to copy back at end.
1801  } // if column and domain Maps are (not) the same
1802 
1803  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1804  if (! importer.is_null () && sweep > 0) {
1805  // We already did the first Import for the zeroth sweep above,
1806  // if it was necessary.
1807  X_colMap->doImport (*X_domainMap, *importer, Tpetra::INSERT);
1808  }
1809  // Do local Gauss-Seidel (forward, backward or symmetric)
1810  serialGaussSeidel_->apply(*X_colMap, B, direction);
1811  }
1812 
1813  if (copyBackOutput) {
1814  try {
1815  deep_copy (X , *X_domainMap); // Copy result back into X.
1816  } catch (std::exception& e) {
1818  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1819  "threw an exception: " << e.what ());
1820  }
1821  }
1822 
1823  // For each column of output, for each sweep over the matrix:
1824  //
1825  // - One + and one * for each matrix entry
1826  // - One / and one + for each row of the matrix
1827  // - If the damping factor is not one: one * for each row of the
1828  // matrix. (It's not fair to count this if the damping factor is
1829  // one, since the implementation could skip it. Whether it does
1830  // or not is the implementation's choice.)
1831 
1832  // Floating-point operations due to the damping factor, per matrix
1833  // row, per direction, per columm of output.
1834  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1835  const double numVectors = X.getNumVectors ();
1836  const double numGlobalRows = A_->getGlobalNumRows ();
1837  const double numGlobalNonzeros = A_->getGlobalNumEntries ();
1838  ApplyFlops_ += NumSweeps_ * numVectors *
1839  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1840 }
1841 
1842 template<class MatrixType>
1843 void
1844 Relaxation<MatrixType>::
1845 ApplyInverseSerialGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1846  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1847  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1848  Tpetra::ESweepDirection direction)
1849 {
1850  using Tpetra::INSERT;
1851  using Teuchos::RCP;
1852  using Teuchos::rcp;
1853  using Teuchos::rcpFromRef;
1854 
1855  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1856  //
1857  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1858  // multiple right-hand sides, unless the input or output MultiVector
1859  // does not have constant stride. We should check for that case
1860  // here, in case it doesn't work in localGaussSeidel (which is
1861  // entirely possible).
1862  block_multivector_type yBlock(Y, *(A.getGraph ()->getDomainMap()), A.getBlockSize());
1863  const block_multivector_type xBlock(X, *(A.getColMap ()), A.getBlockSize());
1864 
1865  bool performImport = false;
1866  RCP<block_multivector_type> yBlockCol;
1867  if (Importer_.is_null()) {
1868  yBlockCol = rcpFromRef(yBlock);
1869  }
1870  else {
1871  if (yBlockColumnPointMap_.is_null() ||
1872  yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1873  yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1874  yBlockColumnPointMap_ =
1875  rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1876  static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1877  }
1878  yBlockCol = yBlockColumnPointMap_;
1879  if (pointImporter_.is_null()) {
1880  auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1881  auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1882  pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1883  }
1884  performImport = true;
1885  }
1886 
1887  multivector_type yBlock_mv;
1888  multivector_type yBlockCol_mv;
1889  RCP<const multivector_type> yBlockColPointDomain;
1890  if (performImport) { // create views (shallow copies)
1891  yBlock_mv = yBlock.getMultiVectorView();
1892  yBlockCol_mv = yBlockCol->getMultiVectorView();
1893  yBlockColPointDomain =
1894  yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1895  }
1896 
1897  if (ZeroStartingSolution_) {
1898  yBlockCol->putScalar(STS::zero ());
1899  }
1900  else if (performImport) {
1901  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1902  }
1903 
1904  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1905  if (performImport && sweep > 0) {
1906  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1907  }
1908  serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1909  if (performImport) {
1910  Tpetra::deep_copy(Y, *yBlockColPointDomain);
1911  }
1912  }
1913 }
1914 
1915 template<class MatrixType>
1916 void
1917 Relaxation<MatrixType>::
1918 ApplyInverseMTGS_CrsMatrix(
1919  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1920  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1921  Tpetra::ESweepDirection direction) const
1922 {
1923  using Teuchos::null;
1924  using Teuchos::RCP;
1925  using Teuchos::rcp;
1926  using Teuchos::rcpFromRef;
1927  using Teuchos::rcp_const_cast;
1928  using Teuchos::as;
1929 
1930  typedef scalar_type Scalar;
1931 
1932  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1933  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1934 
1935  auto crsMat = Details::getCrsMatrix(A_);
1937  (crsMat.is_null(), std::logic_error, "Ifpack2::Relaxation::apply: "
1938  "Multithreaded Gauss-Seidel methods currently only work when the "
1939  "input matrix is a Tpetra::CrsMatrix.");
1940 
1941  //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1943  (! localSmoothingIndices_.is_null (), std::logic_error,
1944  "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1945  "implement the case where the user supplies an iteration order. "
1946  "This error used to appear as \"MT GaussSeidel ignores the given "
1947  "order\". "
1948  "I tried to add more explanation, but I didn't implement \"MT "
1949  "GaussSeidel\" [sic]. "
1950  "You'll have to ask the person who did.");
1951 
1953  (! crsMat->isFillComplete (), std::runtime_error, prefix << "The "
1954  "input CrsMatrix is not fill complete. Please call fillComplete "
1955  "on the matrix, then do setup again, before calling apply(). "
1956  "\"Do setup\" means that if only the matrix's values have changed "
1957  "since last setup, you need only call compute(). If the matrix's "
1958  "structure may also have changed, you must first call initialize(), "
1959  "then call compute(). If you have not set up this preconditioner "
1960  "for this matrix before, you must first call initialize(), then "
1961  "call compute().");
1963  (NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = "
1964  << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1965  "developers.");
1966  if (NumSweeps_ == 0) {
1967  return;
1968  }
1969 
1970  RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1972  ! crsMat->getGraph ()->getExporter ().is_null (), std::runtime_error,
1973  "This method's implementation currently requires that the matrix's row, "
1974  "domain, and range Maps be the same. This cannot be the case, because "
1975  "the matrix has a nontrivial Export object.");
1976 
1977  RCP<const map_type> domainMap = crsMat->getDomainMap ();
1978  RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1979  RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1980  RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1981 
1982 #ifdef HAVE_IFPACK2_DEBUG
1983  {
1984  // The relation 'isSameAs' is transitive. It's also a
1985  // collective, so we don't have to do a "shared" test for
1986  // exception (i.e., a global reduction on the test value).
1988  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1989  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1990  "multivector X be in the domain Map of the matrix.");
1992  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1993  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1994  "B be in the range Map of the matrix.");
1996  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1997  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1998  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
2000  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
2001  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
2002  "the range Map of the matrix be the same.");
2003  }
2004 #else
2005  // Forestall any compiler warnings for unused variables.
2006  (void) rangeMap;
2007  (void) rowMap;
2008 #endif // HAVE_IFPACK2_DEBUG
2009 
2010  // Fetch a (possibly cached) temporary column Map multivector
2011  // X_colMap, and a domain Map view X_domainMap of it. Both have
2012  // constant stride by construction. We know that the domain Map
2013  // must include the column Map, because our Gauss-Seidel kernel
2014  // requires that the row Map, domain Map, and range Map are all
2015  // the same, and that each process owns all of its own diagonal
2016  // entries of the matrix.
2017 
2018  RCP<multivector_type> X_colMap;
2019  RCP<multivector_type> X_domainMap;
2020  bool copyBackOutput = false;
2021  if (importer.is_null ()) {
2022  if (X.isConstantStride ()) {
2023  X_colMap = rcpFromRef (X);
2024  X_domainMap = rcpFromRef (X);
2025 
2026  // Column Map and domain Map are the same, so there are no
2027  // remote entries. Thus, if we are not setting the initial
2028  // guess to zero, we don't have to worry about setting remote
2029  // entries to zero, even though we are not doing an Import in
2030  // this case.
2031  if (ZeroStartingSolution_) {
2032  X_colMap->putScalar (ZERO);
2033  }
2034  // No need to copy back to X at end.
2035  }
2036  else {
2037  // We must copy X into a constant stride multivector.
2038  // Just use the cached column Map multivector for that.
2039  // force=true means fill with zeros, so no need to fill
2040  // remote entries (not in domain Map) with zeros.
2041  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2042  X_colMap = cachedMV_;
2043  // X_domainMap is always a domain Map view of the column Map
2044  // multivector. In this case, the domain and column Maps are
2045  // the same, so X_domainMap _is_ X_colMap.
2046  X_domainMap = X_colMap;
2047  if (! ZeroStartingSolution_) { // Don't copy if zero initial guess
2048  try {
2049  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
2050  } catch (std::exception& e) {
2051  std::ostringstream os;
2052  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2053  "deep_copy(*X_domainMap, X) threw an exception: "
2054  << e.what () << ".";
2055  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2056  }
2057  }
2058  copyBackOutput = true; // Don't forget to copy back at end.
2059  /*
2060  TPETRA_EFFICIENCY_WARNING(
2061  ! X.isConstantStride (),
2062  std::runtime_error,
2063  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
2064  "kernel requires that X and B both have constant stride. Since X "
2065  "does not have constant stride, we had to make a copy. This is a "
2066  "limitation of the current implementation and not your fault, but we "
2067  "still report it as an efficiency warning for your information.");
2068  */
2069  }
2070  }
2071  else { // Column Map and domain Map are _not_ the same.
2072  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2073  X_colMap = cachedMV_;
2074 
2075  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
2076 
2077  if (ZeroStartingSolution_) {
2078  // No need for an Import, since we're filling with zeros.
2079  X_colMap->putScalar (ZERO);
2080  } else {
2081  // We could just copy X into X_domainMap. However, that
2082  // wastes a copy, because the Import also does a copy (plus
2083  // communication). Since the typical use case for
2084  // Gauss-Seidel is a small number of sweeps (2 is typical), we
2085  // don't want to waste that copy. Thus, we do the Import
2086  // here, and skip the first Import in the first sweep.
2087  // Importing directly from X effects the copy into X_domainMap
2088  // (which is a view of X_colMap).
2089  X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
2090  }
2091  copyBackOutput = true; // Don't forget to copy back at end.
2092  } // if column and domain Maps are (not) the same
2093 
2094  // The Gauss-Seidel / SOR kernel expects multivectors of constant
2095  // stride. X_colMap is by construction, but B might not be. If
2096  // it's not, we have to make a copy.
2097  RCP<const multivector_type> B_in;
2098  if (B.isConstantStride ()) {
2099  B_in = rcpFromRef (B);
2100  }
2101  else {
2102  // Range Map and row Map are the same in this case, so we can
2103  // use the cached row Map multivector to store a constant stride
2104  // copy of B.
2105  RCP<multivector_type> B_in_nonconst = rcp (new multivector_type(rowMap, B.getNumVectors()));
2106  try {
2107  deep_copy (*B_in_nonconst, B);
2108  } catch (std::exception& e) {
2109  std::ostringstream os;
2110  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2111  "deep_copy(*B_in_nonconst, B) threw an exception: "
2112  << e.what () << ".";
2113  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2114  }
2115  B_in = rcp_const_cast<const multivector_type> (B_in_nonconst);
2116 
2117  /*
2118  TPETRA_EFFICIENCY_WARNING(
2119  ! B.isConstantStride (),
2120  std::runtime_error,
2121  "MTGaussSeidel: The current implementation requires that B have "
2122  "constant stride. Since B does not have constant stride, we had to "
2123  "copy it into a separate constant-stride multivector. This is a "
2124  "limitation of the current implementation and not your fault, but we "
2125  "still report it as an efficiency warning for your information.");
2126  */
2127  }
2128 
2129  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
2130 
2131  bool update_y_vector = true;
2132  //false as it was done up already, and we dont want to zero it in each sweep.
2133  bool zero_x_vector = false;
2134 
2135  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2136  if (! importer.is_null () && sweep > 0) {
2137  // We already did the first Import for the zeroth sweep above,
2138  // if it was necessary.
2139  X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2140  }
2141 
2142  if (direction == Tpetra::Symmetric) {
2143  KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2144  (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2145  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2146  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2147  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2148  zero_x_vector, update_y_vector, DampingFactor_, 1);
2149  }
2150  else if (direction == Tpetra::Forward) {
2151  KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2152  (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2153  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2154  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2155  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2156  zero_x_vector, update_y_vector, DampingFactor_, 1);
2157  }
2158  else if (direction == Tpetra::Backward) {
2159  KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2160  (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2161  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2162  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2163  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2164  zero_x_vector, update_y_vector, DampingFactor_, 1);
2165  }
2166  else {
2168  true, std::invalid_argument,
2169  prefix << "The 'direction' enum does not have any of its valid "
2170  "values: Forward, Backward, or Symmetric.");
2171  }
2172  update_y_vector = false;
2173  }
2174 
2175  if (copyBackOutput) {
2176  try {
2177  deep_copy (X , *X_domainMap); // Copy result back into X.
2178  } catch (std::exception& e) {
2180  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2181  "threw an exception: " << e.what ());
2182  }
2183  }
2184 
2185  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2186  const double numVectors = as<double> (X.getNumVectors ());
2187  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2188  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2189  double ApplyFlops = NumSweeps_ * numVectors *
2190  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2191  if (direction == Tpetra::Symmetric)
2192  ApplyFlops *= 2.0;
2193  ApplyFlops_ += ApplyFlops;
2194 }
2195 
2196 template<class MatrixType>
2198 {
2199  std::ostringstream os;
2200 
2201  // Output is a valid YAML dictionary in flow style. If you don't
2202  // like everything on a single line, you should call describe()
2203  // instead.
2204  os << "\"Ifpack2::Relaxation\": {";
2205 
2206  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2207  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2208 
2209  // It's useful to print this instance's relaxation method (Jacobi,
2210  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2211  // than that, call describe() instead.
2212  os << "Type: ";
2213  if (PrecType_ == Ifpack2::Details::JACOBI) {
2214  os << "Jacobi";
2215  } else if (PrecType_ == Ifpack2::Details::GS) {
2216  os << "Gauss-Seidel";
2217  } else if (PrecType_ == Ifpack2::Details::SGS) {
2218  os << "Symmetric Gauss-Seidel";
2219  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2220  os << "MT Gauss-Seidel";
2221  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2222  os << "MT Symmetric Gauss-Seidel";
2223  } else if (PrecType_ == Ifpack2::Details::GS2) {
2224  os << "Two-stage Gauss-Seidel";
2225  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2226  os << "Two-stage Symmetric Gauss-Seidel";
2227  }
2228  else {
2229  os << "INVALID";
2230  }
2231  if(hasBlockCrsMatrix_)
2232  os<<", BlockCrs";
2233 
2234  os << ", " << "sweeps: " << NumSweeps_ << ", "
2235  << "damping factor: " << DampingFactor_ << ", ";
2236 
2237  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2238  os << "\"relaxation: mtgs cluster size\": " << clusterSize_ << ", ";
2239  os << "\"relaxation: long row threshold\": " << longRowThreshold_ << ", ";
2240  os << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << ", ";
2241  os << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2242  switch(mtColoringAlgorithm_)
2243  {
2244  case KokkosGraph::COLORING_DEFAULT:
2245  os << "DEFAULT"; break;
2246  case KokkosGraph::COLORING_SERIAL:
2247  os << "SERIAL"; break;
2248  case KokkosGraph::COLORING_VB:
2249  os << "VB"; break;
2250  case KokkosGraph::COLORING_VBBIT:
2251  os << "VBBIT"; break;
2252  case KokkosGraph::COLORING_VBCS:
2253  os << "VBCS"; break;
2254  case KokkosGraph::COLORING_VBD:
2255  os << "VBD"; break;
2256  case KokkosGraph::COLORING_VBDBIT:
2257  os << "VBDBIT"; break;
2258  case KokkosGraph::COLORING_EB:
2259  os << "EB"; break;
2260  default:
2261  os << "*Invalid*";
2262  }
2263  os << ", ";
2264  }
2265 
2266  if (PrecType_ == Ifpack2::Details::GS2 ||
2267  PrecType_ == Ifpack2::Details::SGS2) {
2268  os << "outer sweeps: " << NumOuterSweeps_ << ", "
2269  << "inner sweeps: " << NumInnerSweeps_ << ", "
2270  << "inner damping factor: " << InnerDampingFactor_ << ", ";
2271  }
2272 
2273  if (DoL1Method_) {
2274  os << "use l1: " << DoL1Method_ << ", "
2275  << "l1 eta: " << L1Eta_ << ", ";
2276  }
2277 
2278  if (A_.is_null ()) {
2279  os << "Matrix: null";
2280  }
2281  else {
2282  os << "Global matrix dimensions: ["
2283  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2284  << ", Global nnz: " << A_->getGlobalNumEntries();
2285  }
2286 
2287  os << "}";
2288  return os.str ();
2289 }
2290 
2291 
2292 template<class MatrixType>
2293 void
2296  const Teuchos::EVerbosityLevel verbLevel) const
2297 {
2298  using Teuchos::OSTab;
2300  using Teuchos::VERB_DEFAULT;
2301  using Teuchos::VERB_NONE;
2302  using Teuchos::VERB_LOW;
2303  using Teuchos::VERB_MEDIUM;
2304  using Teuchos::VERB_HIGH;
2305  using Teuchos::VERB_EXTREME;
2306  using std::endl;
2307 
2308  const Teuchos::EVerbosityLevel vl =
2309  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2310 
2311  const int myRank = this->getComm ()->getRank ();
2312 
2313  // none: print nothing
2314  // low: print O(1) info from Proc 0
2315  // medium:
2316  // high:
2317  // extreme:
2318 
2319  if (vl != VERB_NONE && myRank == 0) {
2320  // Describable interface asks each implementation to start with a tab.
2321  OSTab tab1 (out);
2322  // Output is valid YAML; hence the quotes, to protect the colons.
2323  out << "\"Ifpack2::Relaxation\":" << endl;
2324  OSTab tab2 (out);
2325  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2326  << endl;
2327  if (this->getObjectLabel () != "") {
2328  out << "Label: " << this->getObjectLabel () << endl;
2329  }
2330  out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2331  << "Computed: " << (isComputed () ? "true" : "false") << endl
2332  << "Parameters: " << endl;
2333  {
2334  OSTab tab3 (out);
2335  out << "\"relaxation: type\": ";
2336  if (PrecType_ == Ifpack2::Details::JACOBI) {
2337  out << "Jacobi";
2338  } else if (PrecType_ == Ifpack2::Details::GS) {
2339  out << "Gauss-Seidel";
2340  } else if (PrecType_ == Ifpack2::Details::SGS) {
2341  out << "Symmetric Gauss-Seidel";
2342  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2343  out << "MT Gauss-Seidel";
2344  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2345  out << "MT Symmetric Gauss-Seidel";
2346  } else if (PrecType_ == Ifpack2::Details::GS2) {
2347  out << "Two-stage Gauss-Seidel";
2348  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2349  out << "Two-stage Symmetric Gauss-Seidel";
2350  } else {
2351  out << "INVALID";
2352  }
2353  // We quote these parameter names because they contain colons.
2354  // YAML uses the colon to distinguish key from value.
2355  out << endl
2356  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2357  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2358  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2359  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2360  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2361  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2362  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2363  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2364  out << "\"relaxation: mtgs cluster size\": " << clusterSize_ << endl;
2365  out << "\"relaxation: long row threshold\": " << longRowThreshold_ << endl;
2366  out << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << endl;
2367  out << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2368  switch(mtColoringAlgorithm_)
2369  {
2370  case KokkosGraph::COLORING_DEFAULT:
2371  out << "DEFAULT"; break;
2372  case KokkosGraph::COLORING_SERIAL:
2373  out << "SERIAL"; break;
2374  case KokkosGraph::COLORING_VB:
2375  out << "VB"; break;
2376  case KokkosGraph::COLORING_VBBIT:
2377  out << "VBBIT"; break;
2378  case KokkosGraph::COLORING_VBCS:
2379  out << "VBCS"; break;
2380  case KokkosGraph::COLORING_VBD:
2381  out << "VBD"; break;
2382  case KokkosGraph::COLORING_VBDBIT:
2383  out << "VBDBIT"; break;
2384  case KokkosGraph::COLORING_EB:
2385  out << "EB"; break;
2386  default:
2387  out << "*Invalid*";
2388  }
2389  out << endl;
2390  }
2391  if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2392  out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2393  out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2394  out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2395  }
2396  }
2397  out << "Computed quantities:" << endl;
2398  {
2399  OSTab tab3 (out);
2400  out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2401  << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2402  }
2403  if (checkDiagEntries_ && isComputed ()) {
2404  out << "Properties of input diagonal entries:" << endl;
2405  {
2406  OSTab tab3 (out);
2407  out << "Magnitude of minimum-magnitude diagonal entry: "
2408  << globalMinMagDiagEntryMag_ << endl
2409  << "Magnitude of maximum-magnitude diagonal entry: "
2410  << globalMaxMagDiagEntryMag_ << endl
2411  << "Number of diagonal entries with small magnitude: "
2412  << globalNumSmallDiagEntries_ << endl
2413  << "Number of zero diagonal entries: "
2414  << globalNumZeroDiagEntries_ << endl
2415  << "Number of diagonal entries with negative real part: "
2416  << globalNumNegDiagEntries_ << endl
2417  << "Abs 2-norm diff between computed and actual inverse "
2418  << "diagonal: " << globalDiagNormDiff_ << endl;
2419  }
2420  }
2421  if (isComputed ()) {
2422  out << "Saved diagonal offsets: "
2423  << (savedDiagOffsets_ ? "true" : "false") << endl;
2424  }
2425  out << "Call counts and total times (in seconds): " << endl;
2426  {
2427  OSTab tab3 (out);
2428  out << "initialize: " << endl;
2429  {
2430  OSTab tab4 (out);
2431  out << "Call count: " << NumInitialize_ << endl;
2432  out << "Total time: " << InitializeTime_ << endl;
2433  }
2434  out << "compute: " << endl;
2435  {
2436  OSTab tab4 (out);
2437  out << "Call count: " << NumCompute_ << endl;
2438  out << "Total time: " << ComputeTime_ << endl;
2439  }
2440  out << "apply: " << endl;
2441  {
2442  OSTab tab4 (out);
2443  out << "Call count: " << NumApply_ << endl;
2444  out << "Total time: " << ApplyTime_ << endl;
2445  }
2446  }
2447  }
2448 }
2449 
2450 
2451 } // namespace Ifpack2
2452 
2453 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2454  template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2455 
2456 #endif // IFPACK2_RELAXATION_DEF_HPP
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:525
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:567
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:573
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2295
bool is_null(const boost::shared_ptr< T > &p)
basic_OSTab< char > OSTab
static magnitudeType eps()
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:214
T & get(const std::string &name, T def_value)
void compute()
Compute the preconditioner (&quot;numeric setup&quot;);.
Definition: Ifpack2_Relaxation_def.hpp:1025
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static RCP< Time > getNewCounter(const std::string &name)
static RCP< Time > lookupCounter(const std::string &name)
virtual void printDoc(std::string const &docString, std::ostream &out) const =0
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:580
virtual const std::string getXMLTypeName() const =0
static std::ostream & printLines(std::ostream &os, const std::string &linePrefix, const std::string &lines)
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:482
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:193
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:543
T * getRawPtr() const
bool remove(std::string const &name, bool throwIfNotExists=true)
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:555
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:263
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:502
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:472
File for utility functions.
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2197
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:531
void validateParametersAndSetDefaults(ParameterList const &validParamList, int const depth=1000)
virtual ValidStringsList validStringValues() const =0
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:515
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:260
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:549
any & getAny(bool activeQry=true)
Compute scaled damped residual for Chebyshev.
Definition: Ifpack2_Details_InverseDiagonalKernel_decl.hpp:77
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:706
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:493
TypeTo as(const TypeFrom &t)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:257
bool isType(const std::string &name) const
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:254
std::string typeName() const
const std::type_info & type() const
static double wallTime()
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:537
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:237
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:561
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:51
T * get() const
void initialize()
Initialize the preconditioner (&quot;symbolic setup&quot;).
Definition: Ifpack2_Relaxation_def.hpp:727
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Relaxation_decl.hpp:273
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:593
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:226
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:269
virtual void validate(ParameterEntry const &entry, std::string const &paramName, std::string const &sublistName) const =0
bool is_null() const