43 #ifndef IFPACK2_RELAXATION_DEF_HPP
44 #define IFPACK2_RELAXATION_DEF_HPP
46 #include "Teuchos_StandardParameterEntryValidators.hpp"
48 #include "Tpetra_CrsMatrix.hpp"
49 #include "Tpetra_BlockCrsMatrix.hpp"
50 #include "Tpetra_BlockView.hpp"
52 #include "MatrixMarket_Tpetra.hpp"
53 #include "Tpetra_transform_MultiVector.hpp"
54 #include "Tpetra_withLocalAccess_MultiVector.hpp"
58 #include "KokkosSparse_gauss_seidel.hpp"
65 NonnegativeIntValidator () {}
75 const std::string& paramName,
76 const std::string& sublistName)
const
83 anyVal.
type () !=
typeid (int),
85 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
86 <<
"\" has the wrong type." << endl <<
"Parameter: " << paramName
87 << endl <<
"Type specified: " << entryName << endl
88 <<
"Type required: int" << endl);
90 const int val = Teuchos::any_cast<
int> (anyVal);
93 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
94 <<
"\" is negative." << endl <<
"Parameter: " << paramName
95 << endl <<
"Value specified: " << val << endl
96 <<
"Required range: [0, INT_MAX]" << endl);
101 return "NonnegativeIntValidator";
106 printDoc (
const std::string& docString,
107 std::ostream &out)
const
110 out <<
"#\tValidator Used: " << std::endl;
111 out <<
"#\t\tNonnegativeIntValidator" << std::endl;
118 template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
122 static const Scalar eps ();
126 template<
class Scalar>
127 class SmallTraits<Scalar, true> {
129 static const Scalar eps () {
135 template<
class Scalar>
136 class SmallTraits<Scalar, false> {
138 static const Scalar eps () {
144 template<
class Scalar,
146 struct RealTraits {};
148 template<
class Scalar>
149 struct RealTraits<Scalar, false> {
150 using val_type = Scalar;
151 using mag_type = Scalar;
152 static KOKKOS_INLINE_FUNCTION mag_type real (
const val_type& z) {
157 template<
class Scalar>
158 struct RealTraits<Scalar, true> {
159 using val_type = Scalar;
161 static KOKKOS_INLINE_FUNCTION mag_type real (
const val_type& z) {
162 return Kokkos::ArithTraits<val_type>::real (z);
166 template<
class Scalar>
167 KOKKOS_INLINE_FUNCTION
typename RealTraits<Scalar>::mag_type
168 getRealValue (
const Scalar& z) {
169 return RealTraits<Scalar>::real (z);
176 template<
class MatrixType>
178 Relaxation<MatrixType>::
179 updateCachedMultiVector (
const Teuchos::RCP<
const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
180 size_t numVecs)
const
184 if (cachedMV_.is_null () ||
185 map.get () != cachedMV_->getMap ().get () ||
186 cachedMV_->getNumVectors () != numVecs) {
187 using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
188 global_ordinal_type, node_type>;
189 cachedMV_ =
Teuchos::rcp (
new MV (map, numVecs,
false));
194 template<
class MatrixType>
199 Importer_ = Teuchos::null;
200 Diagonal_ = Teuchos::null;
201 isInitialized_ =
false;
203 diagOffsets_ = Kokkos::View<size_t*, typename node_type::device_type> ();
204 savedDiagOffsets_ =
false;
205 hasBlockCrsMatrix_ =
false;
207 IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
214 template<
class MatrixType>
219 PrecType_ (Ifpack2::Details::JACOBI),
220 DampingFactor_ (
STS::one ()),
221 IsParallel_ ((A.
is_null () || A->getRowMap ().
is_null () || A->getRowMap ()->getComm ().
is_null ()) ?
223 A->getRowMap ()->getComm ()->getSize () > 1),
224 ZeroStartingSolution_ (true),
225 DoBackwardGS_ (false),
228 MinDiagonalValue_ (
STS::zero ()),
229 fixTinyDiagEntries_ (false),
230 checkDiagEntries_ (false),
231 is_matrix_structurally_symmetric_ (false),
232 ifpack2_dump_matrix_(false),
233 isInitialized_ (false),
238 InitializeTime_ (0.0),
243 globalMinMagDiagEntryMag_ (
STM::zero ()),
244 globalMaxMagDiagEntryMag_ (
STM::zero ()),
245 globalNumSmallDiagEntries_ (0),
246 globalNumZeroDiagEntries_ (0),
247 globalNumNegDiagEntries_ (0),
248 globalDiagNormDiff_(Teuchos::ScalarTraits<
magnitude_type>::zero()),
249 savedDiagOffsets_ (false),
250 hasBlockCrsMatrix_ (false)
252 this->setObjectLabel (
"Ifpack2::Relaxation");
256 template<
class MatrixType>
262 using Teuchos::parameterList;
265 using Teuchos::rcp_const_cast;
266 using Teuchos::rcp_implicit_cast;
267 using Teuchos::setStringToIntegralParameter;
270 if (validParams_.is_null ()) {
271 RCP<ParameterList> pl = parameterList (
"Ifpack2::Relaxation");
275 Array<std::string> precTypes (5);
276 precTypes[0] =
"Jacobi";
277 precTypes[1] =
"Gauss-Seidel";
278 precTypes[2] =
"Symmetric Gauss-Seidel";
279 precTypes[3] =
"MT Gauss-Seidel";
280 precTypes[4] =
"MT Symmetric Gauss-Seidel";
281 Array<Details::RelaxationType> precTypeEnums (5);
282 precTypeEnums[0] = Details::JACOBI;
283 precTypeEnums[1] = Details::GS;
284 precTypeEnums[2] = Details::SGS;
285 precTypeEnums[3] = Details::MTGS;
286 precTypeEnums[4] = Details::MTSGS;
287 const std::string defaultPrecType (
"Jacobi");
288 setStringToIntegralParameter<Details::RelaxationType> (
"relaxation: type",
289 defaultPrecType,
"Relaxation method", precTypes (), precTypeEnums (),
292 const int numSweeps = 1;
293 RCP<PEV> numSweepsValidator =
294 rcp_implicit_cast<PEV> (
rcp (
new NonnegativeIntValidator));
295 pl->set (
"relaxation: sweeps", numSweeps,
"Number of relaxation sweeps",
296 rcp_const_cast<const PEV> (numSweepsValidator));
299 pl->set (
"relaxation: damping factor", dampingFactor);
301 const bool zeroStartingSolution =
true;
302 pl->set (
"relaxation: zero starting solution", zeroStartingSolution);
304 const bool doBackwardGS =
false;
305 pl->set (
"relaxation: backward mode", doBackwardGS);
307 const bool doL1Method =
false;
308 pl->set (
"relaxation: use l1", doL1Method);
310 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
311 (STM::one() + STM::one());
312 pl->set (
"relaxation: l1 eta", l1eta);
315 pl->set (
"relaxation: min diagonal value", minDiagonalValue);
317 const bool fixTinyDiagEntries =
false;
318 pl->set (
"relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
320 const bool checkDiagEntries =
false;
321 pl->set (
"relaxation: check diagonal entries", checkDiagEntries);
324 pl->set(
"relaxation: local smoothing indices", localSmoothingIndices);
326 const bool is_matrix_structurally_symmetric =
false;
327 pl->set(
"relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
329 const bool ifpack2_dump_matrix =
false;
330 pl->set(
"relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
332 validParams_ = rcp_const_cast<
const ParameterList> (pl);
338 template<
class MatrixType>
341 using Teuchos::getIntegralValue;
344 typedef scalar_type ST;
346 if (pl.
isType<
double>(
"relaxation: damping factor")) {
349 ST df = pl.
get<
double>(
"relaxation: damping factor");
350 pl.
remove(
"relaxation: damping factor");
351 pl.
set(
"relaxation: damping factor",df);
356 const Details::RelaxationType precType =
357 getIntegralValue<Details::RelaxationType> (pl,
"relaxation: type");
358 const int numSweeps = pl.get<
int> (
"relaxation: sweeps");
359 const ST dampingFactor = pl.get<ST> (
"relaxation: damping factor");
360 const bool zeroStartSol = pl.get<
bool> (
"relaxation: zero starting solution");
361 const bool doBackwardGS = pl.get<
bool> (
"relaxation: backward mode");
362 const bool doL1Method = pl.get<
bool> (
"relaxation: use l1");
363 const magnitude_type l1Eta = pl.get<magnitude_type> (
"relaxation: l1 eta");
364 const ST minDiagonalValue = pl.get<ST> (
"relaxation: min diagonal value");
365 const bool fixTinyDiagEntries = pl.get<
bool> (
"relaxation: fix tiny diagonal entries");
366 const bool checkDiagEntries = pl.get<
bool> (
"relaxation: check diagonal entries");
367 const bool is_matrix_structurally_symmetric = pl.get<
bool> (
"relaxation: symmetric matrix structure");
368 const bool ifpack2_dump_matrix = pl.get<
bool> (
"relaxation: ifpack2 dump matrix");
374 PrecType_ = precType;
375 NumSweeps_ = numSweeps;
376 DampingFactor_ = dampingFactor;
377 ZeroStartingSolution_ = zeroStartSol;
378 DoBackwardGS_ = doBackwardGS;
379 DoL1Method_ = doL1Method;
381 MinDiagonalValue_ = minDiagonalValue;
382 fixTinyDiagEntries_ = fixTinyDiagEntries;
383 checkDiagEntries_ = checkDiagEntries;
384 is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
385 ifpack2_dump_matrix_ = ifpack2_dump_matrix;
386 localSmoothingIndices_ = localSmoothingIndices;
390 template<
class MatrixType>
395 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
399 template<
class MatrixType>
403 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getComm: "
404 "The input matrix A is null. Please call setMatrix() with a nonnull "
405 "input matrix before calling this method.");
406 return A_->getRowMap ()->getComm ();
410 template<
class MatrixType>
417 template<
class MatrixType>
418 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
419 typename MatrixType::global_ordinal_type,
420 typename MatrixType::node_type> >
423 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getDomainMap: "
424 "The input matrix A is null. Please call setMatrix() with a nonnull "
425 "input matrix before calling this method.");
426 return A_->getDomainMap ();
430 template<
class MatrixType>
431 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
432 typename MatrixType::global_ordinal_type,
433 typename MatrixType::node_type> >
436 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getRangeMap: "
437 "The input matrix A is null. Please call setMatrix() with a nonnull "
438 "input matrix before calling this method.");
439 return A_->getRangeMap ();
443 template<
class MatrixType>
449 template<
class MatrixType>
451 return(NumInitialize_);
455 template<
class MatrixType>
461 template<
class MatrixType>
467 template<
class MatrixType>
469 return(InitializeTime_);
473 template<
class MatrixType>
475 return(ComputeTime_);
479 template<
class MatrixType>
485 template<
class MatrixType>
487 return(ComputeFlops_);
491 template<
class MatrixType>
498 template<
class MatrixType>
501 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getNodeSmootherComplexity: "
502 "The input matrix A is null. Please call setMatrix() with a nonnull "
503 "input matrix, then call compute(), before calling this method.");
505 return A_->getNodeNumRows() + A_->getNodeNumEntries();
509 template<
class MatrixType>
512 apply (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
513 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
521 using Teuchos::rcpFromRef;
525 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::apply: "
526 "The input matrix A is null. Please call setMatrix() with a nonnull "
527 "input matrix, then call compute(), before calling this method.");
531 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
532 "preconditioner instance before you may call apply(). You may call "
533 "isComputed() to find out if compute() has been called already.");
534 TEUCHOS_TEST_FOR_EXCEPTION(
535 X.getNumVectors() != Y.getNumVectors(),
537 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
538 "X has " << X.getNumVectors() <<
" columns, but Y has "
539 << Y.getNumVectors() <<
" columns.");
540 TEUCHOS_TEST_FOR_EXCEPTION(
541 beta != STS::zero (), std::logic_error,
542 "Ifpack2::Relaxation::apply: beta = " << beta <<
" != 0 case not "
545 const std::string timerName (
"Ifpack2::Relaxation::apply");
554 if (alpha == STS::zero ()) {
556 Y.putScalar (STS::zero ());
564 auto X_lcl_host = X.getLocalViewHost ();
565 auto Y_lcl_host = Y.getLocalViewHost ();
567 if (X_lcl_host.data () == Y_lcl_host.data ()) {
570 Xcopy = rcpFromRef (X);
578 case Ifpack2::Details::JACOBI:
579 ApplyInverseJacobi(*Xcopy,Y);
581 case Ifpack2::Details::GS:
582 ApplyInverseGS(*Xcopy,Y);
584 case Ifpack2::Details::SGS:
585 ApplyInverseSGS(*Xcopy,Y);
587 case Ifpack2::Details::MTSGS:
588 ApplyInverseMTSGS_CrsMatrix(*Xcopy,Y);
590 case Ifpack2::Details::MTGS:
591 ApplyInverseMTGS_CrsMatrix(*Xcopy,Y);
595 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
596 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
597 << PrecType_ <<
". Please report this bug to the Ifpack2 developers.");
599 if (alpha != STS::one ()) {
601 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
602 const double numVectors = as<double> (Y.getNumVectors ());
603 ApplyFlops_ += numGlobalRows * numVectors;
612 template<
class MatrixType>
615 applyMat (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
616 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
620 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: "
621 "The input matrix A is null. Please call setMatrix() with a nonnull "
622 "input matrix, then call compute(), before calling this method.");
624 ! isComputed (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: "
625 "isComputed() must be true before you may call applyMat(). "
626 "Please call compute() before calling this method.");
627 TEUCHOS_TEST_FOR_EXCEPTION(
628 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
629 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
630 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
631 A_->apply (X, Y, mode);
635 template<
class MatrixType>
638 const char methodName[] =
"Ifpack2::Relaxation::initialize";
641 (A_.is_null (), std::runtime_error, methodName <<
": The "
642 "input matrix A is null. Please call setMatrix() with "
643 "a nonnull input matrix before calling this method.");
650 isInitialized_ =
false;
653 auto rowMap = A_->getRowMap ();
654 auto comm = rowMap.is_null () ? Teuchos::null : rowMap->getComm ();
655 IsParallel_ = ! comm.is_null () && comm->getSize () > 1;
665 Importer_ = IsParallel_ ? A_->getGraph ()->getImporter () :
670 Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type> (A_);
671 hasBlockCrsMatrix_ = ! A_bcrs.
is_null ();
674 if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS) {
675 const crs_matrix_type* crsMat =
676 dynamic_cast<const crs_matrix_type*
> (A_.get());
678 (crsMat ==
nullptr, std::logic_error, methodName <<
": "
679 "Multithreaded Gauss-Seidel methods currently only work "
680 "when the input matrix is a Tpetra::CrsMatrix.");
685 if (ifpack2_dump_matrix_) {
686 static int sequence_number = 0;
687 const std::string file_name =
"Ifpack2_MT_GS_" +
688 std::to_string (sequence_number++) +
".mtx";
690 Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_);
691 if (! rcp_crs_mat.
is_null ()) {
692 using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
693 writer_type::writeSparseFile (file_name, rcp_crs_mat);
697 this->mtKernelHandle_ =
Teuchos::rcp (
new mt_kernel_handle_type ());
698 if (mtKernelHandle_->get_gs_handle () ==
nullptr) {
699 mtKernelHandle_->create_gs_handle ();
701 local_matrix_type kcsr = crsMat->getLocalMatrix ();
703 bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
704 is_symmetric = is_symmetric || is_matrix_structurally_symmetric_;
706 using KokkosSparse::Experimental::gauss_seidel_symbolic;
707 gauss_seidel_symbolic<mt_kernel_handle_type,
709 lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
710 A_->getNodeNumRows (),
711 A_->getNodeNumCols (),
718 InitializeTime_ += timer->totalElapsedTime ();
720 isInitialized_ =
true;
724 template <
typename BlockDiagView>
725 struct InvertDiagBlocks {
726 typedef int value_type;
727 typedef typename BlockDiagView::size_type Size;
730 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
731 template <
typename View>
732 using UnmanagedView = Kokkos::View<
typename View::data_type,
typename View::array_layout,
733 typename View::device_type, Unmanaged>;
735 typedef typename BlockDiagView::non_const_value_type Scalar;
736 typedef typename BlockDiagView::device_type Device;
737 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
738 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
740 UnmanagedView<BlockDiagView> block_diag_;
744 UnmanagedView<RWrk> rwrk_;
746 UnmanagedView<IWrk> iwrk_;
749 InvertDiagBlocks (BlockDiagView& block_diag)
750 : block_diag_(block_diag)
752 const auto blksz = block_diag.extent(1);
753 Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
755 Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
759 KOKKOS_INLINE_FUNCTION
760 void operator() (
const Size i,
int& jinfo)
const {
761 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
762 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
763 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
765 Tpetra::GETF2(D_cur, ipiv, info);
770 Tpetra::GETRI(D_cur, ipiv, work, info);
775 KOKKOS_INLINE_FUNCTION
776 void join (
volatile value_type& dst,
volatile value_type
const& src)
const { dst += src; }
780 template<
class MatrixType>
781 void Relaxation<MatrixType>::computeBlockCrs ()
794 using Teuchos::rcp_dynamic_cast;
796 typedef local_ordinal_type LO;
797 typedef typename node_type::device_type device_type;
799 const std::string timerName (
"Ifpack2::Relaxation::computeBlockCrs");
808 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::"
809 "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
810 "with a nonnull input matrix, then call initialize(), before calling "
812 const block_crs_matrix_type* blockCrsA =
813 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
815 blockCrsA ==
nullptr, std::logic_error,
"Ifpack2::Relaxation::"
816 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
817 "got this far. Please report this bug to the Ifpack2 developers.");
819 const scalar_type one = STS::one ();
824 const LO lclNumMeshRows =
825 blockCrsA->getCrsGraph ().getNodeNumRows ();
826 const LO blockSize = blockCrsA->getBlockSize ();
828 if (! savedDiagOffsets_) {
829 blockDiag_ = block_diag_type ();
830 blockDiag_ = block_diag_type (
"Ifpack2::Relaxation::blockDiag_",
831 lclNumMeshRows, blockSize, blockSize);
832 if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
835 diagOffsets_ = Kokkos::View<size_t*, device_type> ();
836 diagOffsets_ = Kokkos::View<size_t*, device_type> (
"offsets", lclNumMeshRows);
838 blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
840 (static_cast<size_t> (diagOffsets_.extent (0)) !=
841 static_cast<size_t> (blockDiag_.extent (0)),
842 std::logic_error,
"diagOffsets_.extent(0) = " <<
843 diagOffsets_.extent (0) <<
" != blockDiag_.extent(0) = "
844 << blockDiag_.extent (0) <<
845 ". Please report this bug to the Ifpack2 developers.");
846 savedDiagOffsets_ =
true;
848 blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
855 unmanaged_block_diag_type blockDiag = blockDiag_;
857 if (DoL1Method_ && IsParallel_) {
858 const scalar_type two = one + one;
859 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
860 Array<LO> indices (maxLength);
861 Array<scalar_type> values (maxLength * blockSize * blockSize);
862 size_t numEntries = 0;
864 for (LO i = 0; i < lclNumMeshRows; ++i) {
866 blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
868 auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
869 for (LO subRow = 0; subRow < blockSize; ++subRow) {
870 magnitude_type diagonal_boost = STM::zero ();
871 for (
size_t k = 0 ; k < numEntries ; ++k) {
872 if (indices[k] > lclNumMeshRows) {
873 const size_t offset = blockSize*blockSize*k + subRow*blockSize;
874 for (LO subCol = 0; subCol < blockSize; ++subCol) {
875 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
879 if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
880 diagBlock(subRow, subRow) += diagonal_boost;
888 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
889 typedef typename unmanaged_block_diag_type::execution_space exec_space;
890 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
892 Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
898 (info > 0, std::runtime_error,
"GETF2 or GETRI failed on = " << info
899 <<
" diagonal blocks.");
904 #ifdef HAVE_IFPACK2_DEBUG
905 const int numResults = 2;
907 int lclResults[2], gblResults[2];
908 lclResults[0] = info;
909 lclResults[1] = -info;
912 reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
913 numResults, lclResults, gblResults);
915 (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
916 "Ifpack2::Relaxation::compute: When processing the input "
917 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
918 "failed on one or more (MPI) processes.");
919 #endif // HAVE_IFPACK2_DEBUG
927 template<
class MatrixType>
930 using Tpetra::readWrite;
941 using Teuchos::rcp_dynamic_cast;
946 using device_type =
typename vector_type::device_type;
947 using IST =
typename vector_type::impl_scalar_type;
948 using KAT = Kokkos::ArithTraits<IST>;
950 const char methodName[] =
"Ifpack2::Relaxation::compute";
951 const scalar_type zero = STS::zero ();
952 const scalar_type one = STS::one ();
956 #ifdef HAVE_IFPACK2_DEBUG
957 constexpr
bool debug =
true;
959 constexpr
bool debug =
false;
960 #endif // HAVE_IFPACK2_DEBUG
963 (A_.is_null (), std::runtime_error, methodName <<
": "
964 "The input matrix A is null. Please call setMatrix() with a nonnull "
965 "input matrix, then call initialize(), before calling this method.");
968 if (! isInitialized ()) {
972 if (hasBlockCrsMatrix_) {
983 (NumSweeps_ < 0, std::logic_error, methodName
984 <<
": NumSweeps_ = " << NumSweeps_ <<
" < 0. "
985 "Please report this bug to the Ifpack2 developers.");
988 Diagonal_ = Teuchos::null;
991 Diagonal_ =
rcp (
new vector_type (A_->getRowMap (),
false));
1009 const crs_matrix_type* crsMat =
1010 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
1011 if (crsMat ==
nullptr || ! crsMat->isStaticGraph ()) {
1012 A_->getLocalDiagCopy (*Diagonal_);
1015 if (! savedDiagOffsets_) {
1016 const size_t lclNumRows = A_->getRowMap ()->getNodeNumElements ();
1017 if (diagOffsets_.extent (0) < lclNumRows) {
1018 using Kokkos::view_alloc;
1019 using Kokkos::WithoutInitializing;
1020 using offsets_view_type = Kokkos::View<size_t*, device_type>;
1022 diagOffsets_ = offsets_view_type ();
1023 auto howAlloc = view_alloc (
"offsets", WithoutInitializing);
1024 diagOffsets_ = offsets_view_type (howAlloc, lclNumRows);
1026 crsMat->getCrsGraph ()->getLocalDiagOffsets (diagOffsets_);
1027 savedDiagOffsets_ =
true;
1029 crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_);
1033 vector_type D_copy (A_->getRowMap ());
1034 A_->getLocalDiagCopy (D_copy);
1035 D_copy.update (STS::one (), *Diagonal_, -STS::one ());
1040 (err != STM::zero(), std::logic_error, methodName <<
": "
1041 <<
"\"fast-path\" diagonal computation failed. "
1042 "\\|D1 - D2\\|_inf = " << err <<
".");
1049 std::unique_ptr<vector_type> origDiag;
1050 if (checkDiagEntries_) {
1051 origDiag = std::unique_ptr<vector_type>
1055 const LO numMyRows =
static_cast<LO
> (A_->getNodeNumRows ());
1069 if (DoL1Method_ && IsParallel_) {
1070 vector_type& gblDiag = *Diagonal_;
1072 decltype (readWrite (gblDiag).on (Kokkos::HostSpace ()));
1075 using lcl_vec_type =
1076 Tpetra::with_local_access_function_argument_type<rw_type>;
1079 Tpetra::withLocalAccess
1080 ([&A_row, L1_eta, numMyRows] (
const lcl_vec_type& diag) {
1082 const size_t maxLength = A_row.getNodeMaxNumRowEntries ();
1083 Array<local_ordinal_type> indices (maxLength);
1084 Array<scalar_type> values (maxLength);
1087 for (LO i = 0; i < numMyRows; ++i) {
1088 A_row.getLocalRowCopy (i, indices (), values (), numEntries);
1090 for (
size_t k = 0 ; k < numEntries; ++k) {
1091 if (indices[k] > numMyRows) {
1092 diagonal_boost += STS::magnitude (values[k] / two);
1095 if (KAT::magnitude (diag[i]) < L1_eta * diagonal_boost) {
1096 diag[i] += diagonal_boost;
1099 }, readWrite (gblDiag).on (Kokkos::HostSpace ()));
1113 const IST oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
1114 KAT::one () /
static_cast<IST
> (SmallTraits<scalar_type>::eps ()) :
1115 KAT::one () /
static_cast<IST
> (MinDiagonalValue_);
1117 const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1119 if (checkDiagEntries_) {
1125 size_t numSmallDiagEntries = 0;
1126 size_t numZeroDiagEntries = 0;
1127 size_t numNegDiagEntries = 0;
1131 vector_type& gblDiag = *Diagonal_;
1135 decltype (readWrite (gblDiag).on (Kokkos::HostSpace ()));
1136 using lcl_vec_type =
1137 Tpetra::with_local_access_function_argument_type<rw_type>;
1138 Tpetra::withLocalAccess
1139 ([&] (
const lcl_vec_type& diag) {
1146 if (numMyRows != 0) {
1148 minMagDiagEntryMag = d_0_mag;
1149 maxMagDiagEntryMag = d_0_mag;
1158 for (LO i = 0; i < numMyRows; ++i) {
1159 const IST d_i = diag[i];
1163 const auto d_i_real = getRealValue (d_i);
1167 if (d_i_real < STM::zero ()) {
1168 ++numNegDiagEntries;
1170 if (d_i_mag < minMagDiagEntryMag) {
1171 minMagDiagEntryMag = d_i_mag;
1173 if (d_i_mag > maxMagDiagEntryMag) {
1174 maxMagDiagEntryMag = d_i_mag;
1177 if (fixTinyDiagEntries_) {
1179 if (d_i_mag <= minDiagValMag) {
1180 ++numSmallDiagEntries;
1181 if (d_i_mag == STM::zero ()) {
1182 ++numZeroDiagEntries;
1184 diag[i] = oneOverMinDiagVal;
1187 diag[i] = KAT::one () / d_i;
1192 if (d_i_mag <= minDiagValMag) {
1193 ++numSmallDiagEntries;
1194 if (d_i_mag == STM::zero ()) {
1195 ++numZeroDiagEntries;
1198 diag[i] = KAT::one () / d_i;
1201 }, readWrite (gblDiag).on (Kokkos::HostSpace ()));
1206 if (STS::isComplex) {
1207 ComputeFlops_ += 4.0 * numMyRows;
1210 ComputeFlops_ += numMyRows;
1227 const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1230 Array<magnitude_type> localVals (2);
1231 localVals[0] = minMagDiagEntryMag;
1233 localVals[1] = -maxMagDiagEntryMag;
1234 Array<magnitude_type> globalVals (2);
1235 reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1236 localVals.getRawPtr (),
1237 globalVals.getRawPtr ());
1238 globalMinMagDiagEntryMag_ = globalVals[0];
1239 globalMaxMagDiagEntryMag_ = -globalVals[1];
1241 Array<size_t> localCounts (3);
1242 localCounts[0] = numSmallDiagEntries;
1243 localCounts[1] = numZeroDiagEntries;
1244 localCounts[2] = numNegDiagEntries;
1245 Array<size_t> globalCounts (3);
1246 reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1247 localCounts.getRawPtr (),
1248 globalCounts.getRawPtr ());
1249 globalNumSmallDiagEntries_ = globalCounts[0];
1250 globalNumZeroDiagEntries_ = globalCounts[1];
1251 globalNumNegDiagEntries_ = globalCounts[2];
1255 vector_type diff (A_->getRowMap ());
1256 diff.reciprocal (*origDiag);
1257 if (Diagonal_->need_sync_device ()) {
1258 Diagonal_->sync_device ();
1260 diff.update (-one, *Diagonal_, one);
1261 globalDiagNormDiff_ = diff.norm2 ();
1264 if (fixTinyDiagEntries_) {
1268 vector_type& gblDiag = *Diagonal_;
1270 (
"Ifpack2::Relaxation::compute: Invert & fix diagonal",
1272 KOKKOS_LAMBDA (
const IST& d_i) {
1276 if (d_i_mag <= minDiagValMag) {
1277 return oneOverMinDiagVal;
1282 return IST (KAT::one () / d_i);
1287 if (Diagonal_->need_sync_device ()) {
1288 Diagonal_->sync_device ();
1290 Diagonal_->reciprocal (*Diagonal_);
1293 if (STS::isComplex) {
1294 ComputeFlops_ += 4.0 * numMyRows;
1297 ComputeFlops_ += numMyRows;
1301 if (Diagonal_->need_sync_device ()) {
1302 Diagonal_->sync_device ();
1305 if (PrecType_ == Ifpack2::Details::MTGS ||
1306 PrecType_ == Ifpack2::Details::MTSGS) {
1309 const crs_matrix_type* crsMat =
1310 dynamic_cast<const crs_matrix_type*
> (A_.get());
1312 (crsMat ==
nullptr, std::logic_error, methodName <<
": "
1313 "Multithreaded Gauss-Seidel methods currently only work "
1314 "when the input matrix is a Tpetra::CrsMatrix.");
1315 local_matrix_type kcsr = crsMat->getLocalMatrix ();
1317 const bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
1318 auto diagView_2d = Diagonal_->getLocalViewDevice ();
1319 auto diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1320 using KokkosSparse::Experimental::gauss_seidel_numeric;
1321 gauss_seidel_numeric<mt_kernel_handle_type,
1324 scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1325 A_->getNodeNumRows (),
1326 A_->getNodeNumCols (),
1341 template<
class MatrixType>
1344 ApplyInverseJacobi (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1345 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1348 if (hasBlockCrsMatrix_) {
1349 ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1353 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1354 const double numVectors = as<double> (X.getNumVectors ());
1355 if (ZeroStartingSolution_) {
1360 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1366 double flopUpdate = 0.0;
1367 if (DampingFactor_ == STS::one ()) {
1369 flopUpdate = numGlobalRows * numVectors;
1373 flopUpdate = 2.0 * numGlobalRows * numVectors;
1375 ApplyFlops_ += flopUpdate;
1376 if (NumSweeps_ == 1) {
1382 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1385 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1387 for (
int j = startSweep; j < NumSweeps_; ++j) {
1389 applyMat (Y, *cachedMV_);
1390 cachedMV_->update (STS::one (), X, -STS::one ());
1391 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1405 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1406 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1407 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1408 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1411 template<
class MatrixType>
1413 Relaxation<MatrixType>::
1414 ApplyInverseJacobi_BlockCrsMatrix (
const Tpetra::MultiVector<scalar_type,
1416 global_ordinal_type,
1418 Tpetra::MultiVector<scalar_type,
1420 global_ordinal_type,
1421 node_type>& Y)
const
1423 using Tpetra::BlockMultiVector;
1424 using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1425 global_ordinal_type, node_type>;
1427 const block_crs_matrix_type* blockMatConst =
1428 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1430 (blockMatConst ==
nullptr, std::logic_error,
"This method should "
1431 "never be called if the matrix A_ is not a BlockCrsMatrix. "
1432 "Please report this bug to the Ifpack2 developers.");
1437 block_crs_matrix_type* blockMat =
1438 const_cast<block_crs_matrix_type*
> (blockMatConst);
1440 auto meshRowMap = blockMat->getRowMap ();
1441 auto meshColMap = blockMat->getColMap ();
1442 const local_ordinal_type blockSize = blockMat->getBlockSize ();
1444 BMV X_blk (X, *meshColMap, blockSize);
1445 BMV Y_blk (Y, *meshRowMap, blockSize);
1447 if (ZeroStartingSolution_) {
1451 Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1452 if (NumSweeps_ == 1) {
1457 auto pointRowMap = Y.getMap ();
1458 const size_t numVecs = X.getNumVectors ();
1462 BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1466 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1468 for (
int j = startSweep; j < NumSweeps_; ++j) {
1469 blockMat->applyBlock (Y_blk, A_times_Y);
1471 Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1472 X_blk, A_times_Y, STS::one ());
1476 template<
class MatrixType>
1478 Relaxation<MatrixType>::
1479 ApplyInverseGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1480 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1482 using this_type = Relaxation<MatrixType>;
1492 const block_crs_matrix_type* blockCrsMat =
1493 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1494 const crs_matrix_type* crsMat =
1495 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
1496 if (blockCrsMat !=
nullptr) {
1497 const_cast<this_type*
> (
this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1499 else if (crsMat !=
nullptr) {
1500 ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1503 ApplyInverseGS_RowMatrix (X, Y);
1508 template<
class MatrixType>
1510 Relaxation<MatrixType>::
1511 ApplyInverseGS_RowMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1512 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1520 using Teuchos::rcpFromRef;
1521 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1526 if (ZeroStartingSolution_) {
1527 Y.putScalar (STS::zero ());
1530 const size_t NumVectors = X.getNumVectors ();
1531 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1532 Array<local_ordinal_type> Indices (maxLength);
1533 Array<scalar_type> Values (maxLength);
1536 const size_t numMyRows = A_->getNodeNumRows();
1537 const local_ordinal_type* rowInd = 0;
1538 size_t numActive = numMyRows;
1539 bool do_local = ! localSmoothingIndices_.is_null ();
1541 rowInd = localSmoothingIndices_.getRawPtr ();
1542 numActive = localSmoothingIndices_.size ();
1547 if (Importer_.is_null ()) {
1548 updateCachedMultiVector (Y.getMap (), NumVectors);
1551 updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
1556 Y2 = rcpFromRef (Y);
1560 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1561 ArrayView<const scalar_type> d_ptr = d_rcp();
1564 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1566 if (constant_stride) {
1568 size_t x_stride = X.getStride();
1569 size_t y2_stride = Y2->getStride();
1570 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1571 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1572 ArrayView<scalar_type> y2_ptr = y2_rcp();
1573 ArrayView<const scalar_type> x_ptr = x_rcp();
1574 Array<scalar_type> dtemp(NumVectors,STS::zero());
1576 for (
int j = 0; j < NumSweeps_; ++j) {
1579 if (Importer_.is_null ()) {
1585 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1589 if (! DoBackwardGS_) {
1590 for (
size_t ii = 0; ii < numActive; ++ii) {
1591 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1593 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1594 dtemp.assign(NumVectors,STS::zero());
1596 for (
size_t k = 0; k < NumEntries; ++k) {
1597 const local_ordinal_type col = Indices[k];
1598 for (
size_t m = 0; m < NumVectors; ++m) {
1599 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1603 for (
size_t m = 0; m < NumVectors; ++m) {
1604 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1611 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1612 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1614 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1615 dtemp.assign (NumVectors, STS::zero ());
1617 for (
size_t k = 0; k < NumEntries; ++k) {
1618 const local_ordinal_type col = Indices[k];
1619 for (
size_t m = 0; m < NumVectors; ++m) {
1620 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1624 for (
size_t m = 0; m < NumVectors; ++m) {
1625 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1631 Tpetra::deep_copy (Y, *Y2);
1637 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1638 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1640 for (
int j = 0; j < NumSweeps_; ++j) {
1643 if (Importer_.is_null ()) {
1646 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1650 if (! DoBackwardGS_) {
1651 for (
size_t ii = 0; ii < numActive; ++ii) {
1652 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1654 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1656 for (
size_t m = 0; m < NumVectors; ++m) {
1657 scalar_type dtemp = STS::zero ();
1658 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1659 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1661 for (
size_t k = 0; k < NumEntries; ++k) {
1662 const local_ordinal_type col = Indices[k];
1663 dtemp += Values[k] * y2_local[col];
1665 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1672 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1673 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1676 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1678 for (
size_t m = 0; m < NumVectors; ++m) {
1679 scalar_type dtemp = STS::zero ();
1680 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1681 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1683 for (
size_t k = 0; k < NumEntries; ++k) {
1684 const local_ordinal_type col = Indices[k];
1685 dtemp += Values[k] * y2_local[col];
1687 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1694 Tpetra::deep_copy (Y, *Y2);
1701 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1702 const double numVectors = as<double> (X.getNumVectors ());
1703 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1704 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1705 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1706 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1710 template<
class MatrixType>
1712 Relaxation<MatrixType>::
1713 ApplyInverseGS_CrsMatrix (
const crs_matrix_type& A,
1714 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1715 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1718 const Tpetra::ESweepDirection direction =
1719 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1720 if (localSmoothingIndices_.is_null ()) {
1721 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1722 NumSweeps_, ZeroStartingSolution_);
1725 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1726 DampingFactor_, direction,
1727 NumSweeps_, ZeroStartingSolution_);
1741 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1742 const double numVectors = as<double> (X.getNumVectors ());
1743 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1744 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1745 ApplyFlops_ += NumSweeps_ * numVectors *
1746 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1749 template<
class MatrixType>
1751 Relaxation<MatrixType>::
1752 ApplyInverseGS_BlockCrsMatrix (
const block_crs_matrix_type& A,
1753 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1754 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1758 using Teuchos::rcpFromRef;
1759 typedef Tpetra::BlockMultiVector<scalar_type,
1760 local_ordinal_type, global_ordinal_type, node_type> BMV;
1761 typedef Tpetra::MultiVector<scalar_type,
1762 local_ordinal_type, global_ordinal_type, node_type> MV;
1771 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1772 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1774 bool performImport =
false;
1776 if (Importer_.is_null ()) {
1777 yBlockCol = rcpFromRef (yBlock);
1780 if (yBlockColumnPointMap_.is_null () ||
1781 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1782 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1783 yBlockColumnPointMap_ =
1784 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
1785 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
1787 yBlockCol = yBlockColumnPointMap_;
1788 performImport =
true;
1791 if (ZeroStartingSolution_) {
1792 yBlockCol->putScalar (STS::zero ());
1794 else if (performImport) {
1795 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1798 const Tpetra::ESweepDirection direction =
1799 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1801 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
1802 if (performImport && sweep > 0) {
1803 yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
1805 A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
1806 DampingFactor_, direction);
1807 if (performImport) {
1808 RCP<const MV> yBlockColPointDomain =
1809 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1810 Tpetra::deep_copy (Y, *yBlockColPointDomain);
1815 template<
class MatrixType>
1817 Relaxation<MatrixType>::
1818 MTGaussSeidel (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1819 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1820 const Tpetra::ESweepDirection direction)
const
1822 using Teuchos::null;
1825 using Teuchos::rcpFromRef;
1826 using Teuchos::rcp_const_cast;
1829 typedef scalar_type Scalar;
1830 typedef local_ordinal_type LocalOrdinal;
1831 typedef global_ordinal_type GlobalOrdinal;
1832 typedef node_type Node;
1834 const char prefix[] =
"Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1837 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (A_.get());
1839 (crsMat ==
nullptr, std::logic_error,
"Ifpack2::Relaxation::apply: "
1840 "Multithreaded Gauss-Seidel methods currently only work when the "
1841 "input matrix is a Tpetra::CrsMatrix.");
1845 (! localSmoothingIndices_.is_null (), std::logic_error,
1846 "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1847 "implement the case where the user supplies an iteration order. "
1848 "This error used to appear as \"MT GaussSeidel ignores the given "
1850 "I tried to add more explanation, but I didn't implement \"MT "
1851 "GaussSeidel\" [sic]. "
1852 "You'll have to ask the person who did.");
1855 (crsMat ==
nullptr, std::logic_error, prefix <<
"The matrix is null."
1856 " This should never happen. Please report this bug to the Ifpack2 "
1859 (! crsMat->isFillComplete (), std::runtime_error, prefix <<
"The "
1860 "input CrsMatrix is not fill complete. Please call fillComplete "
1861 "on the matrix, then do setup again, before calling apply(). "
1862 "\"Do setup\" means that if only the matrix's values have changed "
1863 "since last setup, you need only call compute(). If the matrix's "
1864 "structure may also have changed, you must first call initialize(), "
1865 "then call compute(). If you have not set up this preconditioner "
1866 "for this matrix before, you must first call initialize(), then "
1869 (NumSweeps_ < 0, std::logic_error, prefix <<
": NumSweeps_ = "
1870 << NumSweeps_ <<
" < 0. Please report this bug to the Ifpack2 "
1872 if (NumSweeps_ == 0) {
1876 typedef typename Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1877 typedef typename crs_matrix_type::import_type import_type;
1878 typedef typename crs_matrix_type::export_type export_type;
1879 typedef typename crs_matrix_type::map_type map_type;
1881 RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1882 RCP<const export_type> exporter = crsMat->getGraph ()->getExporter ();
1884 ! exporter.is_null (), std::runtime_error,
1885 "This method's implementation currently requires that the matrix's row, "
1886 "domain, and range Maps be the same. This cannot be the case, because "
1887 "the matrix has a nontrivial Export object.");
1889 RCP<const map_type> domainMap = crsMat->getDomainMap ();
1890 RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1891 RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1892 RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1894 #ifdef HAVE_IFPACK2_DEBUG
1900 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1901 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1902 "multivector X be in the domain Map of the matrix.");
1904 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1905 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1906 "B be in the range Map of the matrix.");
1908 ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1909 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1910 "D be in the row Map of the matrix.");
1912 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1913 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1914 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1916 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1917 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1918 "the range Map of the matrix be the same.");
1924 #endif // HAVE_IFPACK2_DEBUG
1935 RCP<MV> X_domainMap;
1936 bool copyBackOutput =
false;
1937 if (importer.is_null ()) {
1938 if (X.isConstantStride ()) {
1939 X_colMap = rcpFromRef (X);
1940 X_domainMap = rcpFromRef (X);
1947 if (ZeroStartingSolution_) {
1948 X_colMap->putScalar (ZERO);
1957 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
1958 X_colMap = cachedMV_;
1962 X_domainMap = X_colMap;
1963 if (! ZeroStartingSolution_) {
1965 deep_copy (*X_domainMap , X);
1966 }
catch (std::exception& e) {
1967 std::ostringstream os;
1968 os <<
"Ifpack2::Relaxation::MTGaussSeidel: "
1969 "deep_copy(*X_domainMap, X) threw an exception: "
1970 << e.what () <<
".";
1974 copyBackOutput =
true;
1988 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
1989 X_colMap = cachedMV_;
1991 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1993 #ifdef HAVE_IFPACK2_DEBUG
1994 auto X_colMap_host_view = X_colMap->template getLocalView<Kokkos::HostSpace> ();
1995 auto X_domainMap_host_view = X_domainMap->template getLocalView<Kokkos::HostSpace> ();
1997 if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
1999 X_colMap_host_view.data () != X_domainMap_host_view.data (),
2000 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: "
2001 "Pointer to start of column Map view of X is not equal to pointer to "
2002 "start of (domain Map view of) X. This may mean that "
2003 "Tpetra::MultiVector::offsetViewNonConst is broken. "
2004 "Please report this bug to the Tpetra developers.");
2008 X_colMap_host_view.extent (0) < X_domainMap_host_view.extent (0) ||
2009 X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
2010 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: "
2011 "X_colMap has fewer local rows than X_domainMap. "
2012 "X_colMap_host_view.extent(0) = " << X_colMap_host_view.extent (0)
2013 <<
", X_domainMap_host_view.extent(0) = "
2014 << X_domainMap_host_view.extent (0)
2015 <<
", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
2016 <<
", and X_domainMap->getLocalLength() = "
2017 << X_domainMap->getLocalLength ()
2018 <<
". This means that Tpetra::MultiVector::offsetViewNonConst "
2019 "is broken. Please report this bug to the Tpetra developers.");
2022 X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
2023 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: "
2024 "X_colMap has a different number of columns than X_domainMap. "
2025 "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
2026 <<
" != X_domainMap->getNumVectors() = "
2027 << X_domainMap->getNumVectors ()
2028 <<
". This means that Tpetra::MultiVector::offsetViewNonConst "
2029 "is broken. Please report this bug to the Tpetra developers.");
2030 #endif // HAVE_IFPACK2_DEBUG
2032 if (ZeroStartingSolution_) {
2034 X_colMap->putScalar (ZERO);
2044 X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
2046 copyBackOutput =
true;
2053 if (B.isConstantStride ()) {
2054 B_in = rcpFromRef (B);
2061 RCP<MV> B_in_nonconst =
rcp (
new MV (rowMap, B.getNumVectors()));
2063 deep_copy (*B_in_nonconst, B);
2064 }
catch (std::exception& e) {
2065 std::ostringstream os;
2066 os <<
"Ifpack2::Relaxation::MTGaussSeidel: "
2067 "deep_copy(*B_in_nonconst, B) threw an exception: "
2068 << e.what () <<
".";
2071 B_in = rcp_const_cast<
const MV> (B_in_nonconst);
2085 local_matrix_type kcsr = crsMat->getLocalMatrix ();
2086 const size_t NumVectors = X.getNumVectors ();
2088 bool update_y_vector =
true;
2090 bool zero_x_vector =
false;
2092 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
2093 if (! importer.is_null () && sweep > 0) {
2096 X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2099 for (
size_t indVec = 0; indVec < NumVectors; ++indVec) {
2100 if (direction == Tpetra::Symmetric) {
2101 KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2102 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2103 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2104 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2105 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2106 zero_x_vector, update_y_vector, DampingFactor_);
2108 else if (direction == Tpetra::Forward) {
2109 KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2110 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2111 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2112 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2113 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2114 zero_x_vector, update_y_vector, DampingFactor_);
2116 else if (direction == Tpetra::Backward) {
2117 KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2118 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2119 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2120 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2121 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2122 zero_x_vector, update_y_vector, DampingFactor_);
2126 true, std::invalid_argument,
2127 prefix <<
"The 'direction' enum does not have any of its valid "
2128 "values: Forward, Backward, or Symmetric.");
2132 if (NumVectors > 1){
2133 update_y_vector =
true;
2136 update_y_vector =
false;
2140 if (copyBackOutput) {
2142 deep_copy (X , *X_domainMap);
2143 }
catch (std::exception& e) {
2145 true, std::runtime_error, prefix <<
"deep_copy(X, *X_domainMap) "
2146 "threw an exception: " << e.what ());
2150 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2151 const double numVectors = as<double> (X.getNumVectors ());
2152 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2153 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2154 double ApplyFlops = NumSweeps_ * numVectors *
2155 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2156 if (direction == Tpetra::Symmetric)
2158 ApplyFlops_ += ApplyFlops;
2162 template<
class MatrixType>
2164 Relaxation<MatrixType>::
2165 ApplyInverseMTSGS_CrsMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2166 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X)
const
2168 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2169 this->MTGaussSeidel (B, X, direction);
2173 template<
class MatrixType>
2174 void Relaxation<MatrixType>::ApplyInverseMTGS_CrsMatrix (
2175 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2176 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X)
const {
2178 const Tpetra::ESweepDirection direction =
2179 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
2180 this->MTGaussSeidel (B, X, direction);
2183 template<
class MatrixType>
2185 Relaxation<MatrixType>::
2186 ApplyInverseSGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2187 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
2189 using this_type = Relaxation<MatrixType>;
2199 const block_crs_matrix_type* blockCrsMat =
2200 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
2201 const crs_matrix_type* crsMat =
2202 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
2203 if (blockCrsMat !=
nullptr) {
2204 const_cast<this_type*
> (
this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
2206 else if (crsMat !=
nullptr) {
2207 ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
2210 ApplyInverseSGS_RowMatrix (X, Y);
2215 template<
class MatrixType>
2217 Relaxation<MatrixType>::
2218 ApplyInverseSGS_RowMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2219 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
2227 using Teuchos::rcpFromRef;
2228 using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
2229 global_ordinal_type, node_type>;
2234 if (ZeroStartingSolution_) {
2235 Y.putScalar (STS::zero ());
2238 const size_t NumVectors = X.getNumVectors ();
2239 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
2240 Array<local_ordinal_type> Indices (maxLength);
2241 Array<scalar_type> Values (maxLength);
2244 const size_t numMyRows = A_->getNodeNumRows();
2245 const local_ordinal_type * rowInd = 0;
2246 size_t numActive = numMyRows;
2247 bool do_local = !localSmoothingIndices_.is_null();
2249 rowInd = localSmoothingIndices_.getRawPtr();
2250 numActive = localSmoothingIndices_.size();
2256 if (Importer_.is_null ()) {
2257 updateCachedMultiVector (Y.getMap (), NumVectors);
2260 updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
2265 Y2 = rcpFromRef (Y);
2269 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
2270 ArrayView<const scalar_type> d_ptr = d_rcp();
2273 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
2275 if(constant_stride) {
2277 size_t x_stride = X.getStride();
2278 size_t y2_stride = Y2->getStride();
2279 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
2280 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
2281 ArrayView<scalar_type> y2_ptr = y2_rcp();
2282 ArrayView<const scalar_type> x_ptr = x_rcp();
2283 Array<scalar_type> dtemp(NumVectors,STS::zero());
2285 for (
int j = 0; j < NumSweeps_; j++) {
2288 if (Importer_.is_null ()) {
2290 Tpetra::deep_copy (*Y2, Y);
2292 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2295 for (
size_t ii = 0; ii < numActive; ++ii) {
2296 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2298 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2299 dtemp.assign(NumVectors,STS::zero());
2301 for (
size_t k = 0; k < NumEntries; ++k) {
2302 const local_ordinal_type col = Indices[k];
2303 for (
size_t m = 0; m < NumVectors; ++m) {
2304 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2308 for (
size_t m = 0; m < NumVectors; ++m) {
2309 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2315 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2316 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2318 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2319 dtemp.assign(NumVectors,STS::zero());
2321 for (
size_t k = 0; k < NumEntries; ++k) {
2322 const local_ordinal_type col = Indices[k];
2323 for (
size_t m = 0; m < NumVectors; ++m) {
2324 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2328 for (
size_t m = 0; m < NumVectors; ++m) {
2329 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2335 Tpetra::deep_copy (Y, *Y2);
2341 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
2342 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
2344 for (
int iter = 0; iter < NumSweeps_; ++iter) {
2347 if (Importer_.is_null ()) {
2349 Tpetra::deep_copy (*Y2, Y);
2351 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2355 for (
size_t ii = 0; ii < numActive; ++ii) {
2356 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2357 const scalar_type diag = d_ptr[i];
2359 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2361 for (
size_t m = 0; m < NumVectors; ++m) {
2362 scalar_type dtemp = STS::zero ();
2363 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2364 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2366 for (
size_t k = 0; k < NumEntries; ++k) {
2367 const local_ordinal_type col = Indices[k];
2368 dtemp += Values[k] * y2_local[col];
2370 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2376 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2377 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2378 const scalar_type diag = d_ptr[i];
2380 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2382 for (
size_t m = 0; m < NumVectors; ++m) {
2383 scalar_type dtemp = STS::zero ();
2384 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2385 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2387 for (
size_t k = 0; k < NumEntries; ++k) {
2388 const local_ordinal_type col = Indices[k];
2389 dtemp += Values[k] * y2_local[col];
2391 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2397 Tpetra::deep_copy (Y, *Y2);
2403 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2404 const double numVectors = as<double> (X.getNumVectors ());
2405 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2406 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2407 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2408 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2412 template<
class MatrixType>
2414 Relaxation<MatrixType>::
2415 ApplyInverseSGS_CrsMatrix (
const crs_matrix_type& A,
2416 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2417 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
2420 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2421 if (localSmoothingIndices_.is_null ()) {
2422 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
2423 NumSweeps_, ZeroStartingSolution_);
2426 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
2427 DampingFactor_, direction,
2428 NumSweeps_, ZeroStartingSolution_);
2445 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2446 const double numVectors = as<double> (X.getNumVectors ());
2447 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2448 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2449 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2450 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2453 template<
class MatrixType>
2455 Relaxation<MatrixType>::
2456 ApplyInverseSGS_BlockCrsMatrix (
const block_crs_matrix_type& A,
2457 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2458 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
2462 using Teuchos::rcpFromRef;
2463 typedef Tpetra::BlockMultiVector<scalar_type,
2464 local_ordinal_type, global_ordinal_type, node_type> BMV;
2465 typedef Tpetra::MultiVector<scalar_type,
2466 local_ordinal_type, global_ordinal_type, node_type> MV;
2475 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
2476 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
2478 bool performImport =
false;
2480 if (Importer_.is_null ()) {
2481 yBlockCol = Teuchos::rcpFromRef (yBlock);
2484 if (yBlockColumnPointMap_.is_null () ||
2485 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
2486 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
2487 yBlockColumnPointMap_ =
2488 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
2489 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
2491 yBlockCol = yBlockColumnPointMap_;
2492 performImport =
true;
2495 if (ZeroStartingSolution_) {
2496 yBlockCol->putScalar (STS::zero ());
2498 else if (performImport) {
2499 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2503 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2505 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
2506 if (performImport && sweep > 0) {
2507 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2509 A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
2510 DampingFactor_, direction);
2511 if (performImport) {
2512 RCP<const MV> yBlockColPointDomain =
2513 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
2514 MV yBlockView = yBlock.getMultiVectorView ();
2515 Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
2521 template<
class MatrixType>
2524 std::ostringstream os;
2529 os <<
"\"Ifpack2::Relaxation\": {";
2531 os <<
"Initialized: " << (isInitialized () ?
"true" :
"false") <<
", "
2532 <<
"Computed: " << (isComputed () ?
"true" :
"false") <<
", ";
2538 if (PrecType_ == Ifpack2::Details::JACOBI) {
2540 }
else if (PrecType_ == Ifpack2::Details::GS) {
2541 os <<
"Gauss-Seidel";
2542 }
else if (PrecType_ == Ifpack2::Details::SGS) {
2543 os <<
"Symmetric Gauss-Seidel";
2544 }
else if (PrecType_ == Ifpack2::Details::MTGS) {
2545 os <<
"MT Gauss-Seidel";
2546 }
else if (PrecType_ == Ifpack2::Details::MTSGS) {
2547 os <<
"MT Symmetric Gauss-Seidel";
2553 os <<
", " <<
"sweeps: " << NumSweeps_ <<
", "
2554 <<
"damping factor: " << DampingFactor_ <<
", ";
2556 os <<
"use l1: " << DoL1Method_ <<
", "
2557 <<
"l1 eta: " << L1Eta_ <<
", ";
2560 if (A_.is_null ()) {
2561 os <<
"Matrix: null";
2564 os <<
"Global matrix dimensions: ["
2565 << A_->getGlobalNumRows () <<
", " << A_->getGlobalNumCols () <<
"]"
2566 <<
", Global nnz: " << A_->getGlobalNumEntries();
2574 template<
class MatrixType>
2593 const int myRank = this->getComm ()->getRank ();
2605 out <<
"\"Ifpack2::Relaxation\":" << endl;
2607 out <<
"MatrixType: \"" << TypeNameTraits<MatrixType>::name () <<
"\""
2609 if (this->getObjectLabel () !=
"") {
2610 out <<
"Label: " << this->getObjectLabel () << endl;
2612 out <<
"Initialized: " << (isInitialized () ?
"true" :
"false") << endl
2613 <<
"Computed: " << (isComputed () ?
"true" :
"false") << endl
2614 <<
"Parameters: " << endl;
2617 out <<
"\"relaxation: type\": ";
2618 if (PrecType_ == Ifpack2::Details::JACOBI) {
2620 }
else if (PrecType_ == Ifpack2::Details::GS) {
2621 out <<
"Gauss-Seidel";
2622 }
else if (PrecType_ == Ifpack2::Details::SGS) {
2623 out <<
"Symmetric Gauss-Seidel";
2624 }
else if (PrecType_ == Ifpack2::Details::MTGS) {
2625 out <<
"MT Gauss-Seidel";
2626 }
else if (PrecType_ == Ifpack2::Details::MTSGS) {
2627 out <<
"MT Symmetric Gauss-Seidel";
2634 <<
"\"relaxation: sweeps\": " << NumSweeps_ << endl
2635 <<
"\"relaxation: damping factor\": " << DampingFactor_ << endl
2636 <<
"\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2637 <<
"\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2638 <<
"\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2639 <<
"\"relaxation: use l1\": " << DoL1Method_ << endl
2640 <<
"\"relaxation: l1 eta\": " << L1Eta_ << endl;
2642 out <<
"Computed quantities:" << endl;
2645 out <<
"Global number of rows: " << A_->getGlobalNumRows () << endl
2646 <<
"Global number of columns: " << A_->getGlobalNumCols () << endl;
2648 if (checkDiagEntries_ && isComputed ()) {
2649 out <<
"Properties of input diagonal entries:" << endl;
2652 out <<
"Magnitude of minimum-magnitude diagonal entry: "
2653 << globalMinMagDiagEntryMag_ << endl
2654 <<
"Magnitude of maximum-magnitude diagonal entry: "
2655 << globalMaxMagDiagEntryMag_ << endl
2656 <<
"Number of diagonal entries with small magnitude: "
2657 << globalNumSmallDiagEntries_ << endl
2658 <<
"Number of zero diagonal entries: "
2659 << globalNumZeroDiagEntries_ << endl
2660 <<
"Number of diagonal entries with negative real part: "
2661 << globalNumNegDiagEntries_ << endl
2662 <<
"Abs 2-norm diff between computed and actual inverse "
2663 <<
"diagonal: " << globalDiagNormDiff_ << endl;
2666 if (isComputed ()) {
2667 out <<
"Saved diagonal offsets: "
2668 << (savedDiagOffsets_ ?
"true" :
"false") << endl;
2670 out <<
"Call counts and total times (in seconds): " << endl;
2673 out <<
"initialize: " << endl;
2676 out <<
"Call count: " << NumInitialize_ << endl;
2677 out <<
"Total time: " << InitializeTime_ << endl;
2679 out <<
"compute: " << endl;
2682 out <<
"Call count: " << NumCompute_ << endl;
2683 out <<
"Total time: " << ComputeTime_ << endl;
2685 out <<
"apply: " << endl;
2688 out <<
"Call count: " << NumApply_ << endl;
2689 out <<
"Total time: " << ApplyTime_ << endl;
2697 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2698 template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2700 #endif // IFPACK2_RELAXATION_DEF_HPP
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:444
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:486
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:492
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2577
bool is_null(const boost::shared_ptr< T > &p)
basic_OSTab< char > OSTab
static magnitudeType eps()
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:216
T & get(const std::string &name, T def_value)
void compute()
Compute the preconditioner ("numeric setup");.
Definition: Ifpack2_Relaxation_def.hpp:928
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
virtual void printDoc(std::string const &docString, std::ostream &out) const =0
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:499
virtual const std::string getXMLTypeName() const =0
static std::ostream & printLines(std::ostream &os, const std::string &linePrefix, const std::string &lines)
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:401
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:196
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:462
bool remove(std::string const &name, bool throwIfNotExists=true)
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:474
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:258
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:421
void setParameters(const Teuchos::ParameterList ¶ms)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:391
File for utility functions.
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2522
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:450
void validateParametersAndSetDefaults(ParameterList const &validParamList, int const depth=1000)
TEUCHOS_DEPRECATED void reduceAll(const Comm< Ordinal > &comm, const EReductionType reductType, const Packet &send, Packet *globalReduct)
virtual ValidStringsList validStringValues() const =0
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:434
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:255
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:468
any & getAny(bool activeQry=true)
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:615
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:412
TypeTo as(const TypeFrom &t)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:252
bool isType(const std::string &name) const
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:249
double totalElapsedTime(bool readCurrentTime=false) const
std::string typeName() const
const std::type_info & type() const
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:456
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:232
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:480
void getValidParameters(Teuchos::ParameterList ¶ms)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:50
void initialize()
Initialize the preconditioner ("symbolic setup").
Definition: Ifpack2_Relaxation_def.hpp:636
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Relaxation_decl.hpp:265
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:512
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:258
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:261
virtual void validate(ParameterEntry const &entry, std::string const ¶mName, std::string const &sublistName) const =0