43 #ifndef IFPACK2_RELAXATION_DEF_HPP
44 #define IFPACK2_RELAXATION_DEF_HPP
46 #include "Teuchos_StandardParameterEntryValidators.hpp"
48 #include "Tpetra_CrsMatrix.hpp"
49 #include "Tpetra_Experimental_BlockCrsMatrix.hpp"
50 #include "Tpetra_Experimental_BlockView.hpp"
52 #include "MatrixMarket_Tpetra.hpp"
55 #include "KokkosSparse_gauss_seidel.hpp"
62 NonnegativeIntValidator () {}
72 const std::string& paramName,
73 const std::string& sublistName)
const
80 anyVal.
type () !=
typeid (int),
82 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
83 <<
"\" has the wrong type." << endl <<
"Parameter: " << paramName
84 << endl <<
"Type specified: " << entryName << endl
85 <<
"Type required: int" << endl);
87 const int val = Teuchos::any_cast<
int> (anyVal);
90 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
91 <<
"\" is negative." << endl <<
"Parameter: " << paramName
92 << endl <<
"Value specified: " << val << endl
93 <<
"Required range: [0, INT_MAX]" << endl);
98 return "NonnegativeIntValidator";
103 printDoc (
const std::string& docString,
104 std::ostream &out)
const
107 out <<
"#\tValidator Used: " << std::endl;
108 out <<
"#\t\tNonnegativeIntValidator" << std::endl;
115 template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
119 static const Scalar eps ();
123 template<
class Scalar>
124 class SmallTraits<Scalar, true> {
126 static const Scalar eps () {
132 template<
class Scalar>
133 class SmallTraits<Scalar, false> {
135 static const Scalar eps () {
146 template<
class MatrixType>
147 void Relaxation<MatrixType>::updateCachedMultiVector(
const Teuchos::RCP<
const Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> > & map,
size_t numVecs)
const{
150 if(cachedMV_.is_null() || &*map != &*cachedMV_->getMap() || cachedMV_->getNumVectors() !=numVecs)
151 cachedMV_ =
Teuchos::rcp(
new Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>(map, numVecs,
false));
155 template<
class MatrixType>
160 Importer_ = Teuchos::null;
161 Diagonal_ = Teuchos::null;
162 isInitialized_ =
false;
164 diagOffsets_ = Kokkos::View<size_t*, typename node_type::device_type> ();
165 savedDiagOffsets_ =
false;
166 hasBlockCrsMatrix_ =
false;
168 IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
175 template<
class MatrixType>
180 PrecType_ (Ifpack2::Details::JACOBI),
181 DampingFactor_ (
STS::one ()),
182 IsParallel_ ((A.
is_null () || A->getRowMap ().
is_null () || A->getRowMap ()->getComm ().
is_null ()) ?
184 A->getRowMap ()->getComm ()->getSize () > 1),
185 ZeroStartingSolution_ (true),
186 DoBackwardGS_ (false),
189 MinDiagonalValue_ (
STS::zero ()),
190 fixTinyDiagEntries_ (false),
191 checkDiagEntries_ (false),
192 is_matrix_structurally_symmetric_ (false),
193 ifpack2_dump_matrix_(false),
194 isInitialized_ (false),
199 InitializeTime_ (0.0),
204 globalMinMagDiagEntryMag_ (
STM::zero ()),
205 globalMaxMagDiagEntryMag_ (
STM::zero ()),
206 globalNumSmallDiagEntries_ (0),
207 globalNumZeroDiagEntries_ (0),
208 globalNumNegDiagEntries_ (0),
209 globalDiagNormDiff_(Teuchos::ScalarTraits<
magnitude_type>::zero()),
210 savedDiagOffsets_ (false),
211 hasBlockCrsMatrix_ (false)
213 this->setObjectLabel (
"Ifpack2::Relaxation");
217 template<
class MatrixType>
221 template<
class MatrixType>
227 using Teuchos::parameterList;
230 using Teuchos::rcp_const_cast;
231 using Teuchos::rcp_implicit_cast;
232 using Teuchos::setStringToIntegralParameter;
235 if (validParams_.is_null ()) {
236 RCP<ParameterList> pl = parameterList (
"Ifpack2::Relaxation");
240 Array<std::string> precTypes (5);
241 precTypes[0] =
"Jacobi";
242 precTypes[1] =
"Gauss-Seidel";
243 precTypes[2] =
"Symmetric Gauss-Seidel";
244 precTypes[3] =
"MT Gauss-Seidel";
245 precTypes[4] =
"MT Symmetric Gauss-Seidel";
246 Array<Details::RelaxationType> precTypeEnums (5);
247 precTypeEnums[0] = Details::JACOBI;
248 precTypeEnums[1] = Details::GS;
249 precTypeEnums[2] = Details::SGS;
250 precTypeEnums[3] = Details::MTGS;
251 precTypeEnums[4] = Details::MTSGS;
252 const std::string defaultPrecType (
"Jacobi");
253 setStringToIntegralParameter<Details::RelaxationType> (
"relaxation: type",
254 defaultPrecType,
"Relaxation method", precTypes (), precTypeEnums (),
257 const int numSweeps = 1;
258 RCP<PEV> numSweepsValidator =
259 rcp_implicit_cast<PEV> (
rcp (
new NonnegativeIntValidator));
260 pl->set (
"relaxation: sweeps", numSweeps,
"Number of relaxation sweeps",
261 rcp_const_cast<const PEV> (numSweepsValidator));
264 pl->set (
"relaxation: damping factor", dampingFactor);
266 const bool zeroStartingSolution =
true;
267 pl->set (
"relaxation: zero starting solution", zeroStartingSolution);
269 const bool doBackwardGS =
false;
270 pl->set (
"relaxation: backward mode", doBackwardGS);
272 const bool doL1Method =
false;
273 pl->set (
"relaxation: use l1", doL1Method);
275 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
276 (STM::one() + STM::one());
277 pl->set (
"relaxation: l1 eta", l1eta);
280 pl->set (
"relaxation: min diagonal value", minDiagonalValue);
282 const bool fixTinyDiagEntries =
false;
283 pl->set (
"relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
285 const bool checkDiagEntries =
false;
286 pl->set (
"relaxation: check diagonal entries", checkDiagEntries);
289 pl->set(
"relaxation: local smoothing indices", localSmoothingIndices);
291 const bool is_matrix_structurally_symmetric =
false;
292 pl->set(
"relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
294 const bool ifpack2_dump_matrix =
false;
295 pl->set(
"relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
297 validParams_ = rcp_const_cast<
const ParameterList> (pl);
303 template<
class MatrixType>
306 using Teuchos::getIntegralValue;
309 typedef scalar_type ST;
311 if (pl.
isType<
double>(
"relaxation: damping factor")) {
314 ST df = pl.
get<
double>(
"relaxation: damping factor");
315 pl.
remove(
"relaxation: damping factor");
316 pl.
set(
"relaxation: damping factor",df);
321 const Details::RelaxationType precType =
322 getIntegralValue<Details::RelaxationType> (pl,
"relaxation: type");
323 const int numSweeps = pl.get<
int> (
"relaxation: sweeps");
324 const ST dampingFactor = pl.get<ST> (
"relaxation: damping factor");
325 const bool zeroStartSol = pl.get<
bool> (
"relaxation: zero starting solution");
326 const bool doBackwardGS = pl.get<
bool> (
"relaxation: backward mode");
327 const bool doL1Method = pl.get<
bool> (
"relaxation: use l1");
328 const magnitude_type l1Eta = pl.get<magnitude_type> (
"relaxation: l1 eta");
329 const ST minDiagonalValue = pl.get<ST> (
"relaxation: min diagonal value");
330 const bool fixTinyDiagEntries = pl.get<
bool> (
"relaxation: fix tiny diagonal entries");
331 const bool checkDiagEntries = pl.get<
bool> (
"relaxation: check diagonal entries");
332 const bool is_matrix_structurally_symmetric = pl.get<
bool> (
"relaxation: symmetric matrix structure");
333 const bool ifpack2_dump_matrix = pl.get<
bool> (
"relaxation: ifpack2 dump matrix");
339 PrecType_ = precType;
340 NumSweeps_ = numSweeps;
341 DampingFactor_ = dampingFactor;
342 ZeroStartingSolution_ = zeroStartSol;
343 DoBackwardGS_ = doBackwardGS;
344 DoL1Method_ = doL1Method;
346 MinDiagonalValue_ = minDiagonalValue;
347 fixTinyDiagEntries_ = fixTinyDiagEntries;
348 checkDiagEntries_ = checkDiagEntries;
349 is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
350 ifpack2_dump_matrix_ = ifpack2_dump_matrix;
351 localSmoothingIndices_ = localSmoothingIndices;
355 template<
class MatrixType>
360 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
364 template<
class MatrixType>
368 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getComm: "
369 "The input matrix A is null. Please call setMatrix() with a nonnull "
370 "input matrix before calling this method.");
371 return A_->getRowMap ()->getComm ();
375 template<
class MatrixType>
382 template<
class MatrixType>
383 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
384 typename MatrixType::global_ordinal_type,
385 typename MatrixType::node_type> >
388 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getDomainMap: "
389 "The input matrix A is null. Please call setMatrix() with a nonnull "
390 "input matrix before calling this method.");
391 return A_->getDomainMap ();
395 template<
class MatrixType>
396 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
397 typename MatrixType::global_ordinal_type,
398 typename MatrixType::node_type> >
401 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getRangeMap: "
402 "The input matrix A is null. Please call setMatrix() with a nonnull "
403 "input matrix before calling this method.");
404 return A_->getRangeMap ();
408 template<
class MatrixType>
414 template<
class MatrixType>
416 return(NumInitialize_);
420 template<
class MatrixType>
426 template<
class MatrixType>
432 template<
class MatrixType>
434 return(InitializeTime_);
438 template<
class MatrixType>
440 return(ComputeTime_);
444 template<
class MatrixType>
450 template<
class MatrixType>
452 return(ComputeFlops_);
456 template<
class MatrixType>
463 template<
class MatrixType>
466 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getNodeSmootherComplexity: "
467 "The input matrix A is null. Please call setMatrix() with a nonnull "
468 "input matrix, then call compute(), before calling this method.");
470 return A_->getNodeNumRows() + A_->getNodeNumEntries();
474 template<
class MatrixType>
477 apply (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
478 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
486 using Teuchos::rcpFromRef;
490 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::apply: "
491 "The input matrix A is null. Please call setMatrix() with a nonnull "
492 "input matrix, then call compute(), before calling this method.");
496 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
497 "preconditioner instance before you may call apply(). You may call "
498 "isComputed() to find out if compute() has been called already.");
499 TEUCHOS_TEST_FOR_EXCEPTION(
500 X.getNumVectors() != Y.getNumVectors(),
502 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
503 "X has " << X.getNumVectors() <<
" columns, but Y has "
504 << Y.getNumVectors() <<
" columns.");
505 TEUCHOS_TEST_FOR_EXCEPTION(
506 beta != STS::zero (), std::logic_error,
507 "Ifpack2::Relaxation::apply: beta = " << beta <<
" != 0 case not "
510 const std::string timerName (
"Ifpack2::Relaxation::apply");
519 if (alpha == STS::zero ()) {
521 Y.putScalar (STS::zero ());
529 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
530 auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
531 auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
533 auto X_lcl_host = X.getLocalViewHost ();
534 auto Y_lcl_host = Y.getLocalViewHost ();
536 if (X_lcl_host.data () == Y_lcl_host.data ()) {
539 Xcopy = rcpFromRef (X);
547 case Ifpack2::Details::JACOBI:
548 ApplyInverseJacobi(*Xcopy,Y);
550 case Ifpack2::Details::GS:
551 ApplyInverseGS(*Xcopy,Y);
553 case Ifpack2::Details::SGS:
554 ApplyInverseSGS(*Xcopy,Y);
556 case Ifpack2::Details::MTSGS:
557 ApplyInverseMTSGS_CrsMatrix(*Xcopy,Y);
559 case Ifpack2::Details::MTGS:
560 ApplyInverseMTGS_CrsMatrix(*Xcopy,Y);
564 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
565 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
566 << PrecType_ <<
". Please report this bug to the Ifpack2 developers.");
568 if (alpha != STS::one ()) {
570 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
571 const double numVectors = as<double> (Y.getNumVectors ());
572 ApplyFlops_ += numGlobalRows * numVectors;
581 template<
class MatrixType>
584 applyMat (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
585 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
589 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: "
590 "The input matrix A is null. Please call setMatrix() with a nonnull "
591 "input matrix, then call compute(), before calling this method.");
593 ! isComputed (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: "
594 "isComputed() must be true before you may call applyMat(). "
595 "Please call compute() before calling this method.");
596 TEUCHOS_TEST_FOR_EXCEPTION(
597 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
598 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
599 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
600 A_->apply (X, Y, mode);
604 template<
class MatrixType>
608 (A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::initialize: "
609 "The input matrix A is null. Please call setMatrix() with a nonnull "
610 "input matrix before calling this method.");
611 const std::string timerName (
"Ifpack2::Relaxation::initialize");
621 hasBlockCrsMatrix_ =
false;
625 Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type> (A_);
627 hasBlockCrsMatrix_ =
false;
630 hasBlockCrsMatrix_ =
true;
634 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
635 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (A_.get());
637 (crsMat == NULL, std::logic_error,
"Ifpack2::Relaxation::initialize: "
638 "Multithreaded Gauss-Seidel methods currently only work when the input "
639 "matrix is a Tpetra::CrsMatrix.");
641 if(this->ifpack2_dump_matrix_){
642 static int sequence_number = 0;
643 const std::string file_name =
"Ifpack2_MT_GS_" + std::to_string (sequence_number++) +
".mtx";
644 Tpetra::MatrixMarket::Writer<crs_matrix_type> crs_writer;
646 crs_writer.writeSparseFile(file_name, rcp_crs_mat);
649 this->mtKernelHandle_ =
Teuchos::rcp (
new mt_kernel_handle_type ());
650 if (mtKernelHandle_->get_gs_handle () == NULL) {
651 mtKernelHandle_->create_gs_handle ();
653 local_matrix_type kcsr = crsMat->getLocalMatrix ();
655 bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
656 is_symmetric = is_symmetric || is_matrix_structurally_symmetric_;
658 using KokkosSparse::Experimental::gauss_seidel_symbolic;
659 gauss_seidel_symbolic<mt_kernel_handle_type,
661 lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
662 A_->getNodeNumRows (),
663 A_->getNodeNumCols (),
673 isInitialized_ =
true;
677 template <
typename BlockDiagView>
678 struct InvertDiagBlocks {
679 typedef int value_type;
680 typedef typename BlockDiagView::size_type Size;
683 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
684 template <
typename View>
685 using UnmanagedView = Kokkos::View<
typename View::data_type,
typename View::array_layout,
686 typename View::device_type, Unmanaged>;
688 typedef typename BlockDiagView::non_const_value_type Scalar;
689 typedef typename BlockDiagView::device_type Device;
690 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
691 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
693 UnmanagedView<BlockDiagView> block_diag_;
697 UnmanagedView<RWrk> rwrk_;
699 UnmanagedView<IWrk> iwrk_;
702 InvertDiagBlocks (BlockDiagView& block_diag)
703 : block_diag_(block_diag)
705 const auto blksz = block_diag.extent(1);
706 Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
708 Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
712 KOKKOS_INLINE_FUNCTION
713 void operator() (
const Size i,
int& jinfo)
const {
714 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
715 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
716 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
718 Tpetra::Experimental::GETF2(D_cur, ipiv, info);
723 Tpetra::Experimental::GETRI(D_cur, ipiv, work, info);
728 KOKKOS_INLINE_FUNCTION
729 void join (
volatile value_type& dst,
volatile value_type
const& src)
const { dst += src; }
733 template<
class MatrixType>
734 void Relaxation<MatrixType>::computeBlockCrs ()
747 using Teuchos::rcp_dynamic_cast;
749 typedef local_ordinal_type LO;
750 typedef typename node_type::device_type device_type;
752 const std::string timerName (
"Ifpack2::Relaxation::computeBlockCrs");
761 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::"
762 "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
763 "with a nonnull input matrix, then call initialize(), before calling "
765 const block_crs_matrix_type* blockCrsA =
766 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
768 blockCrsA == NULL, std::logic_error,
"Ifpack2::Relaxation::"
769 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
770 "got this far. Please report this bug to the Ifpack2 developers.");
772 const scalar_type one = STS::one ();
777 const LO lclNumMeshRows =
778 blockCrsA->getCrsGraph ().getNodeNumRows ();
779 const LO blockSize = blockCrsA->getBlockSize ();
781 if (! savedDiagOffsets_) {
782 blockDiag_ = block_diag_type ();
783 blockDiag_ = block_diag_type (
"Ifpack2::Relaxation::blockDiag_",
784 lclNumMeshRows, blockSize, blockSize);
785 if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
788 diagOffsets_ = Kokkos::View<size_t*, device_type> ();
789 diagOffsets_ = Kokkos::View<size_t*, device_type> (
"offsets", lclNumMeshRows);
791 blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
793 (static_cast<size_t> (diagOffsets_.extent (0)) !=
794 static_cast<size_t> (blockDiag_.extent (0)),
795 std::logic_error,
"diagOffsets_.extent(0) = " <<
796 diagOffsets_.extent (0) <<
" != blockDiag_.extent(0) = "
797 << blockDiag_.extent (0) <<
798 ". Please report this bug to the Ifpack2 developers.");
799 savedDiagOffsets_ =
true;
801 blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
808 unmanaged_block_diag_type blockDiag = blockDiag_;
810 if (DoL1Method_ && IsParallel_) {
811 const scalar_type two = one + one;
812 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
813 Array<LO> indices (maxLength);
814 Array<scalar_type> values (maxLength * blockSize * blockSize);
815 size_t numEntries = 0;
817 for (LO i = 0; i < lclNumMeshRows; ++i) {
819 blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
821 auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
822 for (LO subRow = 0; subRow < blockSize; ++subRow) {
823 magnitude_type diagonal_boost = STM::zero ();
824 for (
size_t k = 0 ; k < numEntries ; ++k) {
825 if (indices[k] > lclNumMeshRows) {
826 const size_t offset = blockSize*blockSize*k + subRow*blockSize;
827 for (LO subCol = 0; subCol < blockSize; ++subCol) {
828 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
832 if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
833 diagBlock(subRow, subRow) += diagonal_boost;
841 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
842 typedef typename unmanaged_block_diag_type::execution_space exec_space;
843 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
845 Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
851 (info > 0, std::runtime_error,
"GETF2 or GETRI failed on = " << info
852 <<
" diagonal blocks.");
857 #ifdef HAVE_IFPACK2_DEBUG
858 const int numResults = 2;
860 int lclResults[2], gblResults[2];
861 lclResults[0] = info;
862 lclResults[1] = -info;
865 reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
866 numResults, lclResults, gblResults);
868 (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
869 "Ifpack2::Relaxation::compute: When processing the input "
870 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
871 "failed on one or more (MPI) processes.");
872 #endif // HAVE_IFPACK2_DEBUG
874 Importer_ = A_->getGraph ()->getImporter ();
882 template<
class MatrixType>
895 using Teuchos::rcp_dynamic_cast;
899 typedef typename vector_type::device_type device_type;
900 const scalar_type zero = STS::zero ();
901 const scalar_type one = STS::one ();
904 if (! isInitialized ()) {
908 if (hasBlockCrsMatrix_) {
914 const std::string timerName (
"Ifpack2::Relaxation::compute");
926 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::compute: "
927 "The input matrix A is null. Please call setMatrix() with a nonnull "
928 "input matrix, then call initialize(), before calling this method.");
934 NumSweeps_ < 0, std::logic_error,
935 "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ <<
" < 0. "
936 "Please report this bug to the Ifpack2 developers.");
938 Diagonal_ =
rcp (
new vector_type (A_->getRowMap ()));
956 const crs_matrix_type* crsMat =
957 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
958 if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
959 A_->getLocalDiagCopy (*Diagonal_);
961 if (! savedDiagOffsets_) {
962 const size_t lclNumRows = A_->getRowMap ()->getNodeNumElements ();
963 if (diagOffsets_.extent (0) < lclNumRows) {
964 typedef typename node_type::device_type DT;
965 diagOffsets_ = Kokkos::View<size_t*, DT> ();
966 diagOffsets_ = Kokkos::View<size_t*, DT> (
"offsets", lclNumRows);
968 crsMat->getCrsGraph ()->getLocalDiagOffsets (diagOffsets_);
969 savedDiagOffsets_ =
true;
971 crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_);
972 #ifdef HAVE_IFPACK2_DEBUG
974 vector_type D_copy (A_->getRowMap ());
975 A_->getLocalDiagCopy (D_copy);
976 D_copy.update (STS::one (), *Diagonal_, -STS::one ());
981 err != STM::zero(), std::logic_error,
"Ifpack2::Relaxation::compute: "
982 "\"fast-path\" diagonal computation failed. \\|D1 - D2\\|_inf = "
984 #endif // HAVE_IFPACK2_DEBUG
990 RCP<vector_type> origDiag;
991 if (checkDiagEntries_) {
992 origDiag =
rcp (
new vector_type (A_->getRowMap ()));
993 Tpetra::deep_copy (*origDiag, *Diagonal_);
996 const size_t numMyRows = A_->getNodeNumRows ();
999 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
1000 Diagonal_->template sync<Kokkos::HostSpace> ();
1001 Diagonal_->template modify<Kokkos::HostSpace> ();
1002 auto diag_2d = Diagonal_->template getLocalView<Kokkos::HostSpace> ();
1004 Diagonal_->sync_host ();
1005 Diagonal_->modify_host ();
1006 auto diag_2d = Diagonal_->getLocalViewHost ();
1008 auto diag_1d = Kokkos::subview (diag_2d, Kokkos::ALL (), 0);
1010 scalar_type*
const diag =
reinterpret_cast<scalar_type*
> (diag_1d.data ());
1019 if (DoL1Method_ && IsParallel_) {
1020 const scalar_type two = one + one;
1021 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1022 Array<local_ordinal_type> indices (maxLength);
1023 Array<scalar_type> values (maxLength);
1026 for (
size_t i = 0; i < numMyRows; ++i) {
1027 A_->getLocalRowCopy (i, indices (), values (), numEntries);
1029 for (
size_t k = 0 ; k < numEntries ; ++k) {
1030 if (static_cast<size_t> (indices[k]) > numMyRows) {
1031 diagonal_boost += STS::magnitude (values[k] / two);
1034 if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
1035 diag[i] += diagonal_boost;
1051 const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
1052 one / SmallTraits<scalar_type>::eps () :
1053 one / MinDiagonalValue_;
1055 const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1057 if (checkDiagEntries_) {
1063 size_t numSmallDiagEntries = 0;
1064 size_t numZeroDiagEntries = 0;
1065 size_t numNegDiagEntries = 0;
1076 if (numMyRows > 0) {
1077 const scalar_type d_0 = diag[0];
1081 minMagDiagEntryMag = d_0_mag;
1082 maxMagDiagEntryMag = d_0_mag;
1090 for (
size_t i = 0 ; i < numMyRows; ++i) {
1091 const scalar_type d_i = diag[i];
1097 if (d_i_real < STM::zero ()) {
1098 ++numNegDiagEntries;
1100 if (d_i_mag < minMagDiagEntryMag) {
1102 minMagDiagEntryMag = d_i_mag;
1104 if (d_i_mag > maxMagDiagEntryMag) {
1106 maxMagDiagEntryMag = d_i_mag;
1109 if (fixTinyDiagEntries_) {
1111 if (d_i_mag <= minDiagValMag) {
1112 ++numSmallDiagEntries;
1113 if (d_i_mag == STM::zero ()) {
1114 ++numZeroDiagEntries;
1116 diag[i] = oneOverMinDiagVal;
1118 diag[i] = one / d_i;
1123 if (d_i_mag <= minDiagValMag) {
1124 ++numSmallDiagEntries;
1125 if (d_i_mag == STM::zero ()) {
1126 ++numZeroDiagEntries;
1129 diag[i] = one / d_i;
1136 if (STS::isComplex) {
1137 ComputeFlops_ += 4.0 * numMyRows;
1139 ComputeFlops_ += numMyRows;
1156 const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1159 Array<magnitude_type> localVals (2);
1160 localVals[0] = minMagDiagEntryMag;
1162 localVals[1] = -maxMagDiagEntryMag;
1163 Array<magnitude_type> globalVals (2);
1164 reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1165 localVals.getRawPtr (),
1166 globalVals.getRawPtr ());
1167 globalMinMagDiagEntryMag_ = globalVals[0];
1168 globalMaxMagDiagEntryMag_ = -globalVals[1];
1170 Array<size_t> localCounts (3);
1171 localCounts[0] = numSmallDiagEntries;
1172 localCounts[1] = numZeroDiagEntries;
1173 localCounts[2] = numNegDiagEntries;
1174 Array<size_t> globalCounts (3);
1175 reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1176 localCounts.getRawPtr (),
1177 globalCounts.getRawPtr ());
1178 globalNumSmallDiagEntries_ = globalCounts[0];
1179 globalNumZeroDiagEntries_ = globalCounts[1];
1180 globalNumNegDiagEntries_ = globalCounts[2];
1192 vector_type diff (A_->getRowMap ());
1193 diff.reciprocal (*origDiag);
1194 Diagonal_->template sync<device_type> ();
1195 diff.update (-one, *Diagonal_, one);
1196 globalDiagNormDiff_ = diff.norm2 ();
1199 if (fixTinyDiagEntries_) {
1203 for (
size_t i = 0 ; i < numMyRows; ++i) {
1204 const scalar_type d_i = diag[i];
1208 if (d_i_mag <= minDiagValMag) {
1209 diag[i] = oneOverMinDiagVal;
1211 diag[i] = one / d_i;
1216 for (
size_t i = 0 ; i < numMyRows; ++i) {
1217 diag[i] = one / diag[i];
1220 if (STS::isComplex) {
1221 ComputeFlops_ += 4.0 * numMyRows;
1223 ComputeFlops_ += numMyRows;
1227 if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
1228 PrecType_ == Ifpack2::Details::SGS)) {
1232 Importer_ = A_->getGraph ()->getImporter ();
1233 Diagonal_->template sync<device_type> ();
1236 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
1239 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (A_.get());
1241 (crsMat == NULL, std::logic_error,
"Ifpack2::Relaxation::compute: "
1242 "Multithreaded Gauss-Seidel methods currently only work when the input "
1243 "matrix is a Tpetra::CrsMatrix.");
1244 local_matrix_type kcsr = crsMat->getLocalMatrix ();
1246 const bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
1247 using KokkosSparse::Experimental::gauss_seidel_numeric;
1248 typedef typename scalar_nonzero_view_t::device_type dev_type;
1249 auto diagView_2d = Diagonal_->template getLocalView<dev_type> ();
1250 auto diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1251 gauss_seidel_numeric<mt_kernel_handle_type,
1254 scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1255 A_->getNodeNumRows (),
1256 A_->getNodeNumCols (),
1271 template<
class MatrixType>
1274 ApplyInverseJacobi (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1275 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1278 if (hasBlockCrsMatrix_) {
1279 ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1283 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1284 const double numVectors = as<double> (X.getNumVectors ());
1285 if (ZeroStartingSolution_) {
1290 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1296 double flopUpdate = 0.0;
1297 if (DampingFactor_ == STS::one ()) {
1299 flopUpdate = numGlobalRows * numVectors;
1303 flopUpdate = 2.0 * numGlobalRows * numVectors;
1305 ApplyFlops_ += flopUpdate;
1306 if (NumSweeps_ == 1) {
1312 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1315 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1317 for (
int j = startSweep; j < NumSweeps_; ++j) {
1319 applyMat (Y, *cachedMV_);
1320 cachedMV_->update (STS::one (), X, -STS::one ());
1321 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1335 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1336 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1337 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1338 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1341 template<
class MatrixType>
1343 Relaxation<MatrixType>::
1344 ApplyInverseJacobi_BlockCrsMatrix (
const Tpetra::MultiVector<scalar_type,
1346 global_ordinal_type,
1348 Tpetra::MultiVector<scalar_type,
1350 global_ordinal_type,
1351 node_type>& Y)
const
1353 typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1354 local_ordinal_type, global_ordinal_type, node_type> BMV;
1356 const block_crs_matrix_type* blockMatConst =
1357 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1359 (blockMatConst == NULL, std::logic_error,
"This method should never be "
1360 "called if the matrix A_ is not a BlockCrsMatrix. Please report this "
1361 "bug to the Ifpack2 developers.");
1366 block_crs_matrix_type* blockMat =
1367 const_cast<block_crs_matrix_type*
> (blockMatConst);
1369 auto meshRowMap = blockMat->getRowMap ();
1370 auto meshColMap = blockMat->getColMap ();
1371 const local_ordinal_type blockSize = blockMat->getBlockSize ();
1373 BMV X_blk (X, *meshColMap, blockSize);
1374 BMV Y_blk (Y, *meshRowMap, blockSize);
1376 if (ZeroStartingSolution_) {
1380 Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1381 if (NumSweeps_ == 1) {
1386 auto pointRowMap = Y.getMap ();
1387 const size_t numVecs = X.getNumVectors ();
1391 BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1395 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1397 for (
int j = startSweep; j < NumSweeps_; ++j) {
1398 blockMat->applyBlock (Y_blk, A_times_Y);
1400 Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1401 X_blk, A_times_Y, STS::one ());
1405 template<
class MatrixType>
1407 Relaxation<MatrixType>::
1408 ApplyInverseGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1409 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1411 typedef Relaxation<MatrixType> this_type;
1421 const block_crs_matrix_type* blockCrsMat =
1422 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1423 const crs_matrix_type* crsMat =
1424 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
1425 if (blockCrsMat != NULL) {
1426 const_cast<this_type*
> (
this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1427 }
else if (crsMat != NULL) {
1428 ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1430 ApplyInverseGS_RowMatrix (X, Y);
1435 template<
class MatrixType>
1437 Relaxation<MatrixType>::
1438 ApplyInverseGS_RowMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1439 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1447 using Teuchos::rcpFromRef;
1448 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1453 if (ZeroStartingSolution_) {
1454 Y.putScalar (STS::zero ());
1457 const size_t NumVectors = X.getNumVectors ();
1458 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1459 Array<local_ordinal_type> Indices (maxLength);
1460 Array<scalar_type> Values (maxLength);
1463 const size_t numMyRows = A_->getNodeNumRows();
1464 const local_ordinal_type* rowInd = 0;
1465 size_t numActive = numMyRows;
1466 bool do_local = ! localSmoothingIndices_.is_null ();
1468 rowInd = localSmoothingIndices_.getRawPtr ();
1469 numActive = localSmoothingIndices_.size ();
1474 if (Importer_.is_null ()) {
1475 updateCachedMultiVector(Y.getMap(),NumVectors);
1477 updateCachedMultiVector(Importer_->getTargetMap(),NumVectors);
1482 Y2 = rcpFromRef (Y);
1486 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1487 ArrayView<const scalar_type> d_ptr = d_rcp();
1490 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1492 if (constant_stride) {
1494 size_t x_stride = X.getStride();
1495 size_t y2_stride = Y2->getStride();
1496 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1497 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1498 ArrayView<scalar_type> y2_ptr = y2_rcp();
1499 ArrayView<const scalar_type> x_ptr = x_rcp();
1500 Array<scalar_type> dtemp(NumVectors,STS::zero());
1502 for (
int j = 0; j < NumSweeps_; ++j) {
1505 if (Importer_.is_null ()) {
1508 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1512 if (! DoBackwardGS_) {
1513 for (
size_t ii = 0; ii < numActive; ++ii) {
1514 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1516 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1517 dtemp.assign(NumVectors,STS::zero());
1519 for (
size_t k = 0; k < NumEntries; ++k) {
1520 const local_ordinal_type col = Indices[k];
1521 for (
size_t m = 0; m < NumVectors; ++m) {
1522 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1526 for (
size_t m = 0; m < NumVectors; ++m) {
1527 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1534 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1535 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1537 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1538 dtemp.assign (NumVectors, STS::zero ());
1540 for (
size_t k = 0; k < NumEntries; ++k) {
1541 const local_ordinal_type col = Indices[k];
1542 for (
size_t m = 0; m < NumVectors; ++m) {
1543 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1547 for (
size_t m = 0; m < NumVectors; ++m) {
1548 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1554 Tpetra::deep_copy (Y, *Y2);
1560 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1561 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1563 for (
int j = 0; j < NumSweeps_; ++j) {
1566 if (Importer_.is_null ()) {
1569 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1573 if (! DoBackwardGS_) {
1574 for (
size_t ii = 0; ii < numActive; ++ii) {
1575 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1577 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1579 for (
size_t m = 0; m < NumVectors; ++m) {
1580 scalar_type dtemp = STS::zero ();
1581 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1582 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1584 for (
size_t k = 0; k < NumEntries; ++k) {
1585 const local_ordinal_type col = Indices[k];
1586 dtemp += Values[k] * y2_local[col];
1588 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1595 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1596 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1599 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1601 for (
size_t m = 0; m < NumVectors; ++m) {
1602 scalar_type dtemp = STS::zero ();
1603 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1604 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1606 for (
size_t k = 0; k < NumEntries; ++k) {
1607 const local_ordinal_type col = Indices[k];
1608 dtemp += Values[k] * y2_local[col];
1610 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1617 Tpetra::deep_copy (Y, *Y2);
1624 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1625 const double numVectors = as<double> (X.getNumVectors ());
1626 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1627 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1628 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1629 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1633 template<
class MatrixType>
1635 Relaxation<MatrixType>::
1636 ApplyInverseGS_CrsMatrix (
const crs_matrix_type& A,
1637 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1638 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1641 const Tpetra::ESweepDirection direction =
1642 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1643 if (localSmoothingIndices_.is_null ()) {
1644 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1645 NumSweeps_, ZeroStartingSolution_);
1648 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1649 DampingFactor_, direction,
1650 NumSweeps_, ZeroStartingSolution_);
1664 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1665 const double numVectors = as<double> (X.getNumVectors ());
1666 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1667 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1668 ApplyFlops_ += NumSweeps_ * numVectors *
1669 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1672 template<
class MatrixType>
1674 Relaxation<MatrixType>::
1675 ApplyInverseGS_BlockCrsMatrix (
const block_crs_matrix_type& A,
1676 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1677 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1681 using Teuchos::rcpFromRef;
1682 typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1683 local_ordinal_type, global_ordinal_type, node_type> BMV;
1684 typedef Tpetra::MultiVector<scalar_type,
1685 local_ordinal_type, global_ordinal_type, node_type> MV;
1694 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1695 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1697 bool performImport =
false;
1699 if (Importer_.is_null ()) {
1700 yBlockCol = rcpFromRef (yBlock);
1703 if (yBlockColumnPointMap_.is_null () ||
1704 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1705 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1706 yBlockColumnPointMap_ =
1707 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
1708 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
1710 yBlockCol = yBlockColumnPointMap_;
1711 performImport =
true;
1714 if (ZeroStartingSolution_) {
1715 yBlockCol->putScalar (STS::zero ());
1717 else if (performImport) {
1718 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1721 const Tpetra::ESweepDirection direction =
1722 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1724 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
1725 if (performImport && sweep > 0) {
1726 yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
1728 A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
1729 DampingFactor_, direction);
1730 if (performImport) {
1731 RCP<const MV> yBlockColPointDomain =
1732 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1733 Tpetra::deep_copy (Y, *yBlockColPointDomain);
1741 template<
class MatrixType>
1743 Relaxation<MatrixType>::
1744 MTGaussSeidel (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1745 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1746 const Tpetra::ESweepDirection direction)
const
1748 using Teuchos::null;
1751 using Teuchos::rcpFromRef;
1752 using Teuchos::rcp_const_cast;
1755 typedef scalar_type Scalar;
1756 typedef local_ordinal_type LocalOrdinal;
1757 typedef global_ordinal_type GlobalOrdinal;
1758 typedef node_type Node;
1760 const char prefix[] =
"Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1763 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (A_.get());
1765 (crsMat == NULL, std::logic_error,
"Ifpack2::Relaxation::apply: "
1766 "Multithreaded Gauss-Seidel methods currently only work when the input "
1767 "matrix is a Tpetra::CrsMatrix.");
1771 (! localSmoothingIndices_.is_null (), std::logic_error,
1772 "Our implementation of Multithreaded Gauss-Seidel does not implement the "
1773 "use case where the user supplies an iteration order. "
1774 "This error used to appear as \"MT GaussSeidel ignores the given "
1776 "I tried to add more explanation, but I didn't implement \"MT "
1777 "GaussSeidel\" [sic]. "
1778 "You'll have to ask the person who did.");
1781 (crsMat == NULL, std::logic_error, prefix <<
"The matrix is NULL. This "
1782 "should never happen. Please report this bug to the Ifpack2 developers.");
1784 (! crsMat->isFillComplete (), std::runtime_error, prefix <<
"The input "
1785 "CrsMatrix is not fill complete. Please call fillComplete on the matrix,"
1786 " then do setup again, before calling apply(). \"Do setup\" means that "
1787 "if only the matrix's values have changed since last setup, you need only"
1788 " call compute(). If the matrix's structure may also have changed, you "
1789 "must first call initialize(), then call compute(). If you have not set"
1790 " up this preconditioner for this matrix before, you must first call "
1791 "initialize(), then call compute().");
1793 (NumSweeps_ < 0, std::invalid_argument, prefix <<
"The number of sweeps "
1794 "must be nonnegative, but you provided numSweeps = " << NumSweeps_ <<
1796 if (NumSweeps_ == 0) {
1800 typedef typename Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1801 typedef typename crs_matrix_type::import_type import_type;
1802 typedef typename crs_matrix_type::export_type export_type;
1803 typedef typename crs_matrix_type::map_type map_type;
1805 RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1806 RCP<const export_type> exporter = crsMat->getGraph ()->getExporter ();
1808 ! exporter.is_null (), std::runtime_error,
1809 "This method's implementation currently requires that the matrix's row, "
1810 "domain, and range Maps be the same. This cannot be the case, because "
1811 "the matrix has a nontrivial Export object.");
1813 RCP<const map_type> domainMap = crsMat->getDomainMap ();
1814 RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1815 RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1816 RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1818 #ifdef HAVE_IFPACK2_DEBUG
1824 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1825 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1826 "multivector X be in the domain Map of the matrix.");
1828 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1829 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1830 "B be in the range Map of the matrix.");
1832 ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1833 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1834 "D be in the row Map of the matrix.");
1836 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1837 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1838 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1840 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1841 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1842 "the range Map of the matrix be the same.");
1848 #endif // HAVE_IFPACK2_DEBUG
1859 RCP<MV> X_domainMap;
1860 bool copyBackOutput =
false;
1861 if (importer.is_null ()) {
1862 if (X.isConstantStride ()) {
1863 X_colMap = rcpFromRef (X);
1864 X_domainMap = rcpFromRef (X);
1871 if (ZeroStartingSolution_) {
1872 X_colMap->putScalar (ZERO);
1881 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
1882 X_colMap = cachedMV_;
1886 X_domainMap = X_colMap;
1887 if (! ZeroStartingSolution_) {
1889 deep_copy (*X_domainMap , X);
1890 }
catch (std::exception& e) {
1891 std::ostringstream os;
1892 os <<
"Ifpack2::Relaxation::MTGaussSeidel: "
1893 "deep_copy(*X_domainMap, X) threw an exception: "
1894 << e.what () <<
".";
1898 copyBackOutput =
true;
1912 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
1913 X_colMap = cachedMV_;
1915 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1917 #ifdef HAVE_IFPACK2_DEBUG
1918 auto X_colMap_host_view = X_colMap->template getLocalView<Kokkos::HostSpace> ();
1919 auto X_domainMap_host_view = X_domainMap->template getLocalView<Kokkos::HostSpace> ();
1921 if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
1923 X_colMap_host_view.data () != X_domainMap_host_view.data (),
1924 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: "
1925 "Pointer to start of column Map view of X is not equal to pointer to "
1926 "start of (domain Map view of) X. This may mean that "
1927 "Tpetra::MultiVector::offsetViewNonConst is broken. "
1928 "Please report this bug to the Tpetra developers.");
1932 X_colMap_host_view.extent (0) < X_domainMap_host_view.extent (0) ||
1933 X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
1934 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: "
1935 "X_colMap has fewer local rows than X_domainMap. "
1936 "X_colMap_host_view.extent(0) = " << X_colMap_host_view.extent (0)
1937 <<
", X_domainMap_host_view.extent(0) = "
1938 << X_domainMap_host_view.extent (0)
1939 <<
", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
1940 <<
", and X_domainMap->getLocalLength() = "
1941 << X_domainMap->getLocalLength ()
1942 <<
". This means that Tpetra::MultiVector::offsetViewNonConst "
1943 "is broken. Please report this bug to the Tpetra developers.");
1946 X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
1947 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: "
1948 "X_colMap has a different number of columns than X_domainMap. "
1949 "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
1950 <<
" != X_domainMap->getNumVectors() = "
1951 << X_domainMap->getNumVectors ()
1952 <<
". This means that Tpetra::MultiVector::offsetViewNonConst "
1953 "is broken. Please report this bug to the Tpetra developers.");
1954 #endif // HAVE_IFPACK2_DEBUG
1956 if (ZeroStartingSolution_) {
1958 X_colMap->putScalar (ZERO);
1968 X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
1970 copyBackOutput =
true;
1977 if (B.isConstantStride ()) {
1978 B_in = rcpFromRef (B);
1985 RCP<MV> B_in_nonconst =
rcp (
new MV (rowMap, B.getNumVectors()));
1987 deep_copy (*B_in_nonconst, B);
1988 }
catch (std::exception& e) {
1989 std::ostringstream os;
1990 os <<
"Ifpack2::Relaxation::MTGaussSeidel: "
1991 "deep_copy(*B_in_nonconst, B) threw an exception: "
1992 << e.what () <<
".";
1995 B_in = rcp_const_cast<
const MV> (B_in_nonconst);
2009 local_matrix_type kcsr = crsMat->getLocalMatrix ();
2010 const size_t NumVectors = X.getNumVectors ();
2012 bool update_y_vector =
true;
2014 bool zero_x_vector =
false;
2016 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
2017 if (! importer.is_null () && sweep > 0) {
2020 X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2023 for (
size_t indVec = 0; indVec < NumVectors; ++indVec) {
2024 if (direction == Tpetra::Symmetric) {
2025 KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2026 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2027 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2028 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2029 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2030 zero_x_vector, update_y_vector, DampingFactor_);
2032 else if (direction == Tpetra::Forward) {
2033 KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2034 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2035 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2036 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2037 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2038 zero_x_vector, update_y_vector, DampingFactor_);
2040 else if (direction == Tpetra::Backward) {
2041 KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2042 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2043 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2044 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2045 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2046 zero_x_vector, update_y_vector, DampingFactor_);
2050 true, std::invalid_argument,
2051 prefix <<
"The 'direction' enum does not have any of its valid "
2052 "values: Forward, Backward, or Symmetric.");
2056 if (NumVectors > 1){
2057 update_y_vector =
true;
2060 update_y_vector =
false;
2064 if (copyBackOutput) {
2066 deep_copy (X , *X_domainMap);
2067 }
catch (std::exception& e) {
2069 true, std::runtime_error, prefix <<
"deep_copy(X, *X_domainMap) "
2070 "threw an exception: " << e.what ());
2074 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2075 const double numVectors = as<double> (X.getNumVectors ());
2076 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2077 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2078 double ApplyFlops = NumSweeps_ * numVectors *
2079 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2080 if (direction == Tpetra::Symmetric)
2082 ApplyFlops_ += ApplyFlops;
2086 template<
class MatrixType>
2088 Relaxation<MatrixType>::
2089 ApplyInverseMTSGS_CrsMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2090 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X)
const
2092 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2093 this->MTGaussSeidel (B, X, direction);
2097 template<
class MatrixType>
2098 void Relaxation<MatrixType>::ApplyInverseMTGS_CrsMatrix (
2099 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2100 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X)
const {
2102 const Tpetra::ESweepDirection direction =
2103 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
2104 this->MTGaussSeidel (B, X, direction);
2107 template<
class MatrixType>
2109 Relaxation<MatrixType>::
2110 ApplyInverseSGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2111 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
2113 typedef Relaxation<MatrixType> this_type;
2123 const block_crs_matrix_type* blockCrsMat =
dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr());
2124 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
2125 if (blockCrsMat != NULL) {
2126 const_cast<this_type*
> (
this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
2128 else if (crsMat != NULL) {
2129 ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
2131 ApplyInverseSGS_RowMatrix (X, Y);
2136 template<
class MatrixType>
2138 Relaxation<MatrixType>::
2139 ApplyInverseSGS_RowMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2140 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
2148 using Teuchos::rcpFromRef;
2149 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
2155 if (ZeroStartingSolution_) {
2156 Y.putScalar (STS::zero ());
2159 const size_t NumVectors = X.getNumVectors ();
2160 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
2161 Array<local_ordinal_type> Indices (maxLength);
2162 Array<scalar_type> Values (maxLength);
2165 const size_t numMyRows = A_->getNodeNumRows();
2166 const local_ordinal_type * rowInd = 0;
2167 size_t numActive = numMyRows;
2168 bool do_local = !localSmoothingIndices_.is_null();
2170 rowInd = localSmoothingIndices_.getRawPtr();
2171 numActive = localSmoothingIndices_.size();
2177 if (Importer_.is_null ()) {
2178 updateCachedMultiVector(Y.getMap(),NumVectors);
2180 updateCachedMultiVector(Importer_->getTargetMap(),NumVectors);
2185 Y2 = rcpFromRef (Y);
2189 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
2190 ArrayView<const scalar_type> d_ptr = d_rcp();
2193 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
2195 if(constant_stride) {
2197 size_t x_stride = X.getStride();
2198 size_t y2_stride = Y2->getStride();
2199 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
2200 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
2201 ArrayView<scalar_type> y2_ptr = y2_rcp();
2202 ArrayView<const scalar_type> x_ptr = x_rcp();
2203 Array<scalar_type> dtemp(NumVectors,STS::zero());
2205 for (
int j = 0; j < NumSweeps_; j++) {
2208 if (Importer_.is_null ()) {
2210 Tpetra::deep_copy (*Y2, Y);
2212 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2215 for (
size_t ii = 0; ii < numActive; ++ii) {
2216 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2218 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2219 dtemp.assign(NumVectors,STS::zero());
2221 for (
size_t k = 0; k < NumEntries; ++k) {
2222 const local_ordinal_type col = Indices[k];
2223 for (
size_t m = 0; m < NumVectors; ++m) {
2224 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2228 for (
size_t m = 0; m < NumVectors; ++m) {
2229 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2235 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2236 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2238 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2239 dtemp.assign(NumVectors,STS::zero());
2241 for (
size_t k = 0; k < NumEntries; ++k) {
2242 const local_ordinal_type col = Indices[k];
2243 for (
size_t m = 0; m < NumVectors; ++m) {
2244 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2248 for (
size_t m = 0; m < NumVectors; ++m) {
2249 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2255 Tpetra::deep_copy (Y, *Y2);
2261 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
2262 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
2264 for (
int iter = 0; iter < NumSweeps_; ++iter) {
2267 if (Importer_.is_null ()) {
2269 Tpetra::deep_copy (*Y2, Y);
2271 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2275 for (
size_t ii = 0; ii < numActive; ++ii) {
2276 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2277 const scalar_type diag = d_ptr[i];
2279 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2281 for (
size_t m = 0; m < NumVectors; ++m) {
2282 scalar_type dtemp = STS::zero ();
2283 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2284 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2286 for (
size_t k = 0; k < NumEntries; ++k) {
2287 const local_ordinal_type col = Indices[k];
2288 dtemp += Values[k] * y2_local[col];
2290 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2296 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2297 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2298 const scalar_type diag = d_ptr[i];
2300 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2302 for (
size_t m = 0; m < NumVectors; ++m) {
2303 scalar_type dtemp = STS::zero ();
2304 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2305 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2307 for (
size_t k = 0; k < NumEntries; ++k) {
2308 const local_ordinal_type col = Indices[k];
2309 dtemp += Values[k] * y2_local[col];
2311 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2317 Tpetra::deep_copy (Y, *Y2);
2323 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2324 const double numVectors = as<double> (X.getNumVectors ());
2325 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2326 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2327 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2328 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2332 template<
class MatrixType>
2334 Relaxation<MatrixType>::
2335 ApplyInverseSGS_CrsMatrix (
const crs_matrix_type& A,
2336 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2337 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
2340 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2341 if (localSmoothingIndices_.is_null ()) {
2342 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
2343 NumSweeps_, ZeroStartingSolution_);
2346 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
2347 DampingFactor_, direction,
2348 NumSweeps_, ZeroStartingSolution_);
2365 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2366 const double numVectors = as<double> (X.getNumVectors ());
2367 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2368 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2369 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2370 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2373 template<
class MatrixType>
2375 Relaxation<MatrixType>::
2376 ApplyInverseSGS_BlockCrsMatrix (
const block_crs_matrix_type& A,
2377 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2378 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
2382 using Teuchos::rcpFromRef;
2383 typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
2384 local_ordinal_type, global_ordinal_type, node_type> BMV;
2385 typedef Tpetra::MultiVector<scalar_type,
2386 local_ordinal_type, global_ordinal_type, node_type> MV;
2395 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
2396 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
2398 bool performImport =
false;
2400 if (Importer_.is_null ()) {
2401 yBlockCol = Teuchos::rcpFromRef (yBlock);
2404 if (yBlockColumnPointMap_.is_null () ||
2405 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
2406 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
2407 yBlockColumnPointMap_ =
2408 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
2409 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
2411 yBlockCol = yBlockColumnPointMap_;
2412 performImport =
true;
2415 if (ZeroStartingSolution_) {
2416 yBlockCol->putScalar (STS::zero ());
2418 else if (performImport) {
2419 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2423 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2425 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
2426 if (performImport && sweep > 0) {
2427 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2429 A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
2430 DampingFactor_, direction);
2431 if (performImport) {
2432 RCP<const MV> yBlockColPointDomain =
2433 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
2434 MV yBlockView = yBlock.getMultiVectorView ();
2435 Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
2441 template<
class MatrixType>
2444 std::ostringstream os;
2449 os <<
"\"Ifpack2::Relaxation\": {";
2451 os <<
"Initialized: " << (isInitialized () ?
"true" :
"false") <<
", "
2452 <<
"Computed: " << (isComputed () ?
"true" :
"false") <<
", ";
2458 if (PrecType_ == Ifpack2::Details::JACOBI) {
2460 }
else if (PrecType_ == Ifpack2::Details::GS) {
2461 os <<
"Gauss-Seidel";
2462 }
else if (PrecType_ == Ifpack2::Details::SGS) {
2463 os <<
"Symmetric Gauss-Seidel";
2464 }
else if (PrecType_ == Ifpack2::Details::MTGS) {
2465 os <<
"MT Gauss-Seidel";
2466 }
else if (PrecType_ == Ifpack2::Details::MTSGS) {
2467 os <<
"MT Symmetric Gauss-Seidel";
2473 os <<
", " <<
"sweeps: " << NumSweeps_ <<
", "
2474 <<
"damping factor: " << DampingFactor_ <<
", ";
2476 os <<
"use l1: " << DoL1Method_ <<
", "
2477 <<
"l1 eta: " << L1Eta_ <<
", ";
2480 if (A_.is_null ()) {
2481 os <<
"Matrix: null";
2484 os <<
"Global matrix dimensions: ["
2485 << A_->getGlobalNumRows () <<
", " << A_->getGlobalNumCols () <<
"]"
2486 <<
", Global nnz: " << A_->getGlobalNumEntries();
2494 template<
class MatrixType>
2513 const int myRank = this->getComm ()->getRank ();
2525 out <<
"\"Ifpack2::Relaxation\":" << endl;
2527 out <<
"MatrixType: \"" << TypeNameTraits<MatrixType>::name () <<
"\""
2529 if (this->getObjectLabel () !=
"") {
2530 out <<
"Label: " << this->getObjectLabel () << endl;
2532 out <<
"Initialized: " << (isInitialized () ?
"true" :
"false") << endl
2533 <<
"Computed: " << (isComputed () ?
"true" :
"false") << endl
2534 <<
"Parameters: " << endl;
2537 out <<
"\"relaxation: type\": ";
2538 if (PrecType_ == Ifpack2::Details::JACOBI) {
2540 }
else if (PrecType_ == Ifpack2::Details::GS) {
2541 out <<
"Gauss-Seidel";
2542 }
else if (PrecType_ == Ifpack2::Details::SGS) {
2543 out <<
"Symmetric Gauss-Seidel";
2544 }
else if (PrecType_ == Ifpack2::Details::MTGS) {
2545 out <<
"MT Gauss-Seidel";
2546 }
else if (PrecType_ == Ifpack2::Details::MTSGS) {
2547 out <<
"MT Symmetric Gauss-Seidel";
2554 <<
"\"relaxation: sweeps\": " << NumSweeps_ << endl
2555 <<
"\"relaxation: damping factor\": " << DampingFactor_ << endl
2556 <<
"\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2557 <<
"\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2558 <<
"\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2559 <<
"\"relaxation: use l1\": " << DoL1Method_ << endl
2560 <<
"\"relaxation: l1 eta\": " << L1Eta_ << endl;
2562 out <<
"Computed quantities:" << endl;
2565 out <<
"Global number of rows: " << A_->getGlobalNumRows () << endl
2566 <<
"Global number of columns: " << A_->getGlobalNumCols () << endl;
2568 if (checkDiagEntries_ && isComputed ()) {
2569 out <<
"Properties of input diagonal entries:" << endl;
2572 out <<
"Magnitude of minimum-magnitude diagonal entry: "
2573 << globalMinMagDiagEntryMag_ << endl
2574 <<
"Magnitude of maximum-magnitude diagonal entry: "
2575 << globalMaxMagDiagEntryMag_ << endl
2576 <<
"Number of diagonal entries with small magnitude: "
2577 << globalNumSmallDiagEntries_ << endl
2578 <<
"Number of zero diagonal entries: "
2579 << globalNumZeroDiagEntries_ << endl
2580 <<
"Number of diagonal entries with negative real part: "
2581 << globalNumNegDiagEntries_ << endl
2582 <<
"Abs 2-norm diff between computed and actual inverse "
2583 <<
"diagonal: " << globalDiagNormDiff_ << endl;
2586 if (isComputed ()) {
2587 out <<
"Saved diagonal offsets: "
2588 << (savedDiagOffsets_ ?
"true" :
"false") << endl;
2590 out <<
"Call counts and total times (in seconds): " << endl;
2593 out <<
"initialize: " << endl;
2596 out <<
"Call count: " << NumInitialize_ << endl;
2597 out <<
"Total time: " << InitializeTime_ << endl;
2599 out <<
"compute: " << endl;
2602 out <<
"Call count: " << NumCompute_ << endl;
2603 out <<
"Total time: " << ComputeTime_ << endl;
2605 out <<
"apply: " << endl;
2608 out <<
"Call count: " << NumApply_ << endl;
2609 out <<
"Total time: " << ApplyTime_ << endl;
2617 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2618 template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2620 #endif // IFPACK2_RELAXATION_DEF_HPP
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:409
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:451
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:457
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2497
bool is_null(const boost::shared_ptr< T > &p)
basic_OSTab< char > OSTab
static magnitudeType eps()
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:177
T & get(const std::string &name, T def_value)
void compute()
Compute the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:883
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
virtual void printDoc(std::string const &docString, std::ostream &out) const =0
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:464
virtual const std::string getXMLTypeName() const =0
static std::ostream & printLines(std::ostream &os, const std::string &linePrefix, const std::string &lines)
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:366
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:157
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:427
bool remove(std::string const &name, bool throwIfNotExists=true)
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:439
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:247
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:386
void setParameters(const Teuchos::ParameterList ¶ms)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:356
File for utility functions.
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2442
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:415
void validateParametersAndSetDefaults(ParameterList const &validParamList, int const depth=1000)
TEUCHOS_DEPRECATED void reduceAll(const Comm< Ordinal > &comm, const EReductionType reductType, const Packet &send, Packet *globalReduct)
virtual ValidStringsList validStringValues() const =0
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:399
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:244
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:433
any & getAny(bool activeQry=true)
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:584
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:377
TypeTo as(const TypeFrom &t)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:241
bool isType(const std::string &name) const
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:238
double totalElapsedTime(bool readCurrentTime=false) const
std::string typeName() const
virtual ~Relaxation()
Destructor.
Definition: Ifpack2_Relaxation_def.hpp:218
const std::type_info & type() const
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:421
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:223
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:445
void getValidParameters(Teuchos::ParameterList ¶ms)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:50
void initialize()
Initialize the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:605
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:477
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:223
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:250
virtual void validate(ParameterEntry const &entry, std::string const ¶mName, std::string const &sublistName) const =0