41 #ifndef IFPACK2_RELAXATION_DEF_HPP
42 #define IFPACK2_RELAXATION_DEF_HPP
44 #include "Teuchos_StandardParameterEntryValidators.hpp"
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_BlockCrsMatrix.hpp"
48 #include "Tpetra_BlockView.hpp"
50 #include "MatrixMarket_Tpetra.hpp"
51 #include "Tpetra_transform_MultiVector.hpp"
52 #include "Tpetra_withLocalAccess_MultiVector.hpp"
53 #include "Tpetra_Details_residual.hpp"
57 #include "KokkosSparse_gauss_seidel.hpp"
64 NonnegativeIntValidator () {}
74 const std::string& paramName,
75 const std::string& sublistName)
const
82 anyVal.
type () !=
typeid (int),
84 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
85 <<
"\" has the wrong type." << endl <<
"Parameter: " << paramName
86 << endl <<
"Type specified: " << entryName << endl
87 <<
"Type required: int" << endl);
89 const int val = Teuchos::any_cast<
int> (anyVal);
92 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
93 <<
"\" is negative." << endl <<
"Parameter: " << paramName
94 << endl <<
"Value specified: " << val << endl
95 <<
"Required range: [0, INT_MAX]" << endl);
100 return "NonnegativeIntValidator";
105 printDoc (
const std::string& docString,
106 std::ostream &out)
const
109 out <<
"#\tValidator Used: " << std::endl;
110 out <<
"#\t\tNonnegativeIntValidator" << std::endl;
117 template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
121 static const Scalar eps ();
125 template<
class Scalar>
126 class SmallTraits<Scalar, true> {
128 static const Scalar eps () {
134 template<
class Scalar>
135 class SmallTraits<Scalar, false> {
137 static const Scalar eps () {
143 template<
class Scalar,
145 struct RealTraits {};
147 template<
class Scalar>
148 struct RealTraits<Scalar, false> {
149 using val_type = Scalar;
150 using mag_type = Scalar;
151 static KOKKOS_INLINE_FUNCTION mag_type real (
const val_type& z) {
156 template<
class Scalar>
157 struct RealTraits<Scalar, true> {
158 using val_type = Scalar;
160 static KOKKOS_INLINE_FUNCTION mag_type real (
const val_type& z) {
161 return Kokkos::ArithTraits<val_type>::real (z);
165 template<
class Scalar>
166 KOKKOS_INLINE_FUNCTION
typename RealTraits<Scalar>::mag_type
167 getRealValue (
const Scalar& z) {
168 return RealTraits<Scalar>::real (z);
175 template<
class MatrixType>
177 Relaxation<MatrixType>::
178 updateCachedMultiVector (
const Teuchos::RCP<
const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
179 size_t numVecs)
const
183 if (cachedMV_.is_null () ||
184 map.get () != cachedMV_->getMap ().get () ||
185 cachedMV_->getNumVectors () != numVecs) {
186 using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
187 global_ordinal_type, node_type>;
188 cachedMV_ =
Teuchos::rcp (
new MV (map, numVecs,
false));
192 template<
class MatrixType>
197 Importer_ = Teuchos::null;
198 pointImporter_ = Teuchos::null;
199 Diagonal_ = Teuchos::null;
200 isInitialized_ =
false;
202 using device_type =
typename node_type::device_type;
203 diagOffsets_ = Kokkos::View<size_t*, device_type>();
204 savedDiagOffsets_ =
false;
205 hasBlockCrsMatrix_ =
false;
207 IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
213 template<
class MatrixType>
217 IsParallel_ ((A.
is_null () || A->getRowMap ().
is_null () || A->getRowMap ()->getComm ().
is_null ()) ?
219 A->getRowMap ()->getComm ()->getSize () > 1)
221 this->setObjectLabel (
"Ifpack2::Relaxation");
225 template<
class MatrixType>
231 using Teuchos::parameterList;
234 using Teuchos::rcp_const_cast;
235 using Teuchos::rcp_implicit_cast;
236 using Teuchos::setStringToIntegralParameter;
239 if (validParams_.is_null ()) {
240 RCP<ParameterList> pl = parameterList (
"Ifpack2::Relaxation");
244 Array<std::string> precTypes (8);
245 precTypes[0] =
"Jacobi";
246 precTypes[1] =
"Gauss-Seidel";
247 precTypes[2] =
"Symmetric Gauss-Seidel";
248 precTypes[3] =
"MT Gauss-Seidel";
249 precTypes[4] =
"MT Symmetric Gauss-Seidel";
250 precTypes[5] =
"Richardson";
251 precTypes[6] =
"Two-stage Gauss-Seidel";
252 precTypes[7] =
"Two-stage Symmetric Gauss-Seidel";
253 Array<Details::RelaxationType> precTypeEnums (8);
254 precTypeEnums[0] = Details::JACOBI;
255 precTypeEnums[1] = Details::GS;
256 precTypeEnums[2] = Details::SGS;
257 precTypeEnums[3] = Details::MTGS;
258 precTypeEnums[4] = Details::MTSGS;
259 precTypeEnums[5] = Details::RICHARDSON;
260 precTypeEnums[6] = Details::GS2;
261 precTypeEnums[7] = Details::SGS2;
262 const std::string defaultPrecType (
"Jacobi");
263 setStringToIntegralParameter<Details::RelaxationType> (
"relaxation: type",
264 defaultPrecType,
"Relaxation method", precTypes (), precTypeEnums (),
267 const int numSweeps = 1;
268 RCP<PEV> numSweepsValidator =
269 rcp_implicit_cast<PEV> (
rcp (
new NonnegativeIntValidator));
270 pl->set (
"relaxation: sweeps", numSweeps,
"Number of relaxation sweeps",
271 rcp_const_cast<const PEV> (numSweepsValidator));
274 const int numInnerSweeps = 1;
275 RCP<PEV> numInnerSweepsValidator =
276 rcp_implicit_cast<PEV> (
rcp (
new NonnegativeIntValidator));
277 pl->set (
"relaxation: inner sweeps", numInnerSweeps,
"Number of inner relaxation sweeps",
278 rcp_const_cast<const PEV> (numInnerSweepsValidator));
280 const bool innerSpTrsv =
false;
281 pl->set (
"relaxation: inner sparse-triangular solve", innerSpTrsv);
284 pl->set (
"relaxation: damping factor", dampingFactor);
286 const bool zeroStartingSolution =
true;
287 pl->set (
"relaxation: zero starting solution", zeroStartingSolution);
289 const bool doBackwardGS =
false;
290 pl->set (
"relaxation: backward mode", doBackwardGS);
292 const bool doL1Method =
false;
293 pl->set (
"relaxation: use l1", doL1Method);
295 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
296 (STM::one() + STM::one());
297 pl->set (
"relaxation: l1 eta", l1eta);
300 pl->set (
"relaxation: min diagonal value", minDiagonalValue);
302 const bool fixTinyDiagEntries =
false;
303 pl->set (
"relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
305 const bool checkDiagEntries =
false;
306 pl->set (
"relaxation: check diagonal entries", checkDiagEntries);
309 pl->set(
"relaxation: local smoothing indices", localSmoothingIndices);
311 const bool is_matrix_structurally_symmetric =
false;
312 pl->set(
"relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
314 const bool ifpack2_dump_matrix =
false;
315 pl->set(
"relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
317 const int cluster_size = 1;
318 pl->set(
"relaxation: mtgs cluster size", cluster_size);
320 validParams_ = rcp_const_cast<
const ParameterList> (pl);
326 template<
class MatrixType>
329 using Teuchos::getIntegralValue;
332 typedef scalar_type ST;
334 if (pl.
isType<
double>(
"relaxation: damping factor")) {
337 ST df = pl.
get<
double>(
"relaxation: damping factor");
338 pl.
remove(
"relaxation: damping factor");
339 pl.
set(
"relaxation: damping factor",df);
344 const Details::RelaxationType precType =
345 getIntegralValue<Details::RelaxationType> (pl,
"relaxation: type");
346 const int numSweeps = pl.get<
int> (
"relaxation: sweeps");
347 const int numInnerSweeps = pl.get<
int> (
"relaxation: inner sweeps");
348 const bool innerSpTrsv = pl.get<
bool> (
"relaxation: inner sparse-triangular solve");
349 const ST dampingFactor = pl.get<ST> (
"relaxation: damping factor");
350 const bool zeroStartSol = pl.get<
bool> (
"relaxation: zero starting solution");
351 const bool doBackwardGS = pl.get<
bool> (
"relaxation: backward mode");
352 const bool doL1Method = pl.get<
bool> (
"relaxation: use l1");
353 const magnitude_type l1Eta = pl.get<magnitude_type> (
"relaxation: l1 eta");
354 const ST minDiagonalValue = pl.get<ST> (
"relaxation: min diagonal value");
355 const bool fixTinyDiagEntries = pl.get<
bool> (
"relaxation: fix tiny diagonal entries");
356 const bool checkDiagEntries = pl.get<
bool> (
"relaxation: check diagonal entries");
357 const bool is_matrix_structurally_symmetric = pl.get<
bool> (
"relaxation: symmetric matrix structure");
358 const bool ifpack2_dump_matrix = pl.get<
bool> (
"relaxation: ifpack2 dump matrix");
359 int cluster_size = 1;
360 if(pl.isParameter (
"relaxation: mtgs cluster size"))
361 cluster_size = pl.get<
int> (
"relaxation: mtgs cluster size");
367 PrecType_ = precType;
368 NumSweeps_ = numSweeps;
369 NumInnerSweeps_ = numInnerSweeps;
370 InnerSpTrsv_ = innerSpTrsv;
371 DampingFactor_ = dampingFactor;
372 ZeroStartingSolution_ = zeroStartSol;
373 DoBackwardGS_ = doBackwardGS;
374 DoL1Method_ = doL1Method;
376 MinDiagonalValue_ = minDiagonalValue;
377 fixTinyDiagEntries_ = fixTinyDiagEntries;
378 checkDiagEntries_ = checkDiagEntries;
379 clusterSize_ = cluster_size;
380 is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
381 ifpack2_dump_matrix_ = ifpack2_dump_matrix;
382 localSmoothingIndices_ = localSmoothingIndices;
386 template<
class MatrixType>
391 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
395 template<
class MatrixType>
399 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getComm: "
400 "The input matrix A is null. Please call setMatrix() with a nonnull "
401 "input matrix before calling this method.");
402 return A_->getRowMap ()->getComm ();
406 template<
class MatrixType>
413 template<
class MatrixType>
414 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
415 typename MatrixType::global_ordinal_type,
416 typename MatrixType::node_type> >
419 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getDomainMap: "
420 "The input matrix A is null. Please call setMatrix() with a nonnull "
421 "input matrix before calling this method.");
422 return A_->getDomainMap ();
426 template<
class MatrixType>
427 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
428 typename MatrixType::global_ordinal_type,
429 typename MatrixType::node_type> >
432 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getRangeMap: "
433 "The input matrix A is null. Please call setMatrix() with a nonnull "
434 "input matrix before calling this method.");
435 return A_->getRangeMap ();
439 template<
class MatrixType>
445 template<
class MatrixType>
447 return(NumInitialize_);
451 template<
class MatrixType>
457 template<
class MatrixType>
463 template<
class MatrixType>
465 return(InitializeTime_);
469 template<
class MatrixType>
471 return(ComputeTime_);
475 template<
class MatrixType>
481 template<
class MatrixType>
483 return(ComputeFlops_);
487 template<
class MatrixType>
494 template<
class MatrixType>
497 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getNodeSmootherComplexity: "
498 "The input matrix A is null. Please call setMatrix() with a nonnull "
499 "input matrix, then call compute(), before calling this method.");
501 return A_->getNodeNumRows() + A_->getNodeNumEntries();
505 template<
class MatrixType>
508 apply (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
509 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
517 using Teuchos::rcpFromRef;
521 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::apply: "
522 "The input matrix A is null. Please call setMatrix() with a nonnull "
523 "input matrix, then call compute(), before calling this method.");
527 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
528 "preconditioner instance before you may call apply(). You may call "
529 "isComputed() to find out if compute() has been called already.");
530 TEUCHOS_TEST_FOR_EXCEPTION(
531 X.getNumVectors() != Y.getNumVectors(),
533 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
534 "X has " << X.getNumVectors() <<
" columns, but Y has "
535 << Y.getNumVectors() <<
" columns.");
536 TEUCHOS_TEST_FOR_EXCEPTION(
537 beta != STS::zero (), std::logic_error,
538 "Ifpack2::Relaxation::apply: beta = " << beta <<
" != 0 case not "
541 const std::string timerName (
"Ifpack2::Relaxation::apply");
550 if (alpha == STS::zero ()) {
552 Y.putScalar (STS::zero ());
560 auto X_lcl_host = X.getLocalViewHost ();
561 auto Y_lcl_host = Y.getLocalViewHost ();
563 if (X_lcl_host.data () == Y_lcl_host.data ()) {
566 Xcopy = rcpFromRef (X);
574 case Ifpack2::Details::JACOBI:
575 ApplyInverseJacobi(*Xcopy,Y);
577 case Ifpack2::Details::GS:
578 ApplyInverseGS(*Xcopy,Y);
580 case Ifpack2::Details::SGS:
581 ApplyInverseSGS(*Xcopy,Y);
583 case Ifpack2::Details::MTSGS:
584 case Ifpack2::Details::SGS2:
585 ApplyInverseMTSGS_CrsMatrix(*Xcopy,Y);
587 case Ifpack2::Details::MTGS:
588 case Ifpack2::Details::GS2:
589 ApplyInverseMTGS_CrsMatrix(*Xcopy,Y);
591 case Ifpack2::Details::RICHARDSON:
592 ApplyInverseRichardson(*Xcopy,Y);
596 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
597 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
598 << PrecType_ <<
". Please report this bug to the Ifpack2 developers.");
600 if (alpha != STS::one ()) {
602 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
603 const double numVectors = as<double> (Y.getNumVectors ());
604 ApplyFlops_ += numGlobalRows * numVectors;
613 template<
class MatrixType>
616 applyMat (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
617 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
621 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: "
622 "The input matrix A is null. Please call setMatrix() with a nonnull "
623 "input matrix, then call compute(), before calling this method.");
625 ! isComputed (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: "
626 "isComputed() must be true before you may call applyMat(). "
627 "Please call compute() before calling this method.");
628 TEUCHOS_TEST_FOR_EXCEPTION(
629 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
630 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
631 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
632 A_->apply (X, Y, mode);
636 template<
class MatrixType>
639 const char methodName[] =
"Ifpack2::Relaxation::initialize";
642 (A_.is_null (), std::runtime_error, methodName <<
": The "
643 "input matrix A is null. Please call setMatrix() with "
644 "a nonnull input matrix before calling this method.");
651 isInitialized_ =
false;
654 auto rowMap = A_->getRowMap ();
655 auto comm = rowMap.is_null () ? Teuchos::null : rowMap->getComm ();
656 IsParallel_ = ! comm.is_null () && comm->getSize () > 1;
666 Importer_ = IsParallel_ ? A_->getGraph ()->getImporter () :
671 Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type> (A_);
672 hasBlockCrsMatrix_ = ! A_bcrs.
is_null ();
675 if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
676 PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
677 const crs_matrix_type* crsMat =
678 dynamic_cast<const crs_matrix_type*
> (A_.get());
680 (crsMat ==
nullptr, std::logic_error, methodName <<
": "
681 "Multithreaded Gauss-Seidel methods currently only work "
682 "when the input matrix is a Tpetra::CrsMatrix.");
687 if (ifpack2_dump_matrix_) {
688 static int sequence_number = 0;
689 const std::string file_name =
"Ifpack2_MT_GS_" +
690 std::to_string (sequence_number++) +
".mtx";
692 Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_);
693 if (! rcp_crs_mat.
is_null ()) {
694 using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
695 writer_type::writeSparseFile (file_name, rcp_crs_mat);
699 this->mtKernelHandle_ =
Teuchos::rcp (
new mt_kernel_handle_type ());
700 if (mtKernelHandle_->get_gs_handle () ==
nullptr) {
701 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
702 mtKernelHandle_->create_gs_handle (KokkosSparse::GS_TWOSTAGE);
703 else if(this->clusterSize_ == 1)
704 mtKernelHandle_->create_gs_handle ();
706 mtKernelHandle_->create_gs_handle (KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_);
708 local_matrix_type kcsr = crsMat->getLocalMatrix ();
709 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
711 mtKernelHandle_->set_gs_set_num_inner_sweeps (NumInnerSweeps_);
712 mtKernelHandle_->set_gs_twostage (!InnerSpTrsv_, A_->getNodeNumRows ());
715 using KokkosSparse::Experimental::gauss_seidel_symbolic;
716 gauss_seidel_symbolic<mt_kernel_handle_type,
718 lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
719 A_->getNodeNumRows (),
720 A_->getNodeNumCols (),
723 is_matrix_structurally_symmetric_);
727 InitializeTime_ += timer->totalElapsedTime ();
729 isInitialized_ =
true;
733 template <
typename BlockDiagView>
734 struct InvertDiagBlocks {
735 typedef int value_type;
736 typedef typename BlockDiagView::size_type Size;
739 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
740 template <
typename View>
741 using UnmanagedView = Kokkos::View<
typename View::data_type,
typename View::array_layout,
742 typename View::device_type, Unmanaged>;
744 typedef typename BlockDiagView::non_const_value_type Scalar;
745 typedef typename BlockDiagView::device_type Device;
746 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
747 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
749 UnmanagedView<BlockDiagView> block_diag_;
753 UnmanagedView<RWrk> rwrk_;
755 UnmanagedView<IWrk> iwrk_;
758 InvertDiagBlocks (BlockDiagView& block_diag)
759 : block_diag_(block_diag)
761 const auto blksz = block_diag.extent(1);
762 Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
764 Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
768 KOKKOS_INLINE_FUNCTION
769 void operator() (
const Size i,
int& jinfo)
const {
770 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
771 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
772 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
774 Tpetra::GETF2(D_cur, ipiv, info);
779 Tpetra::GETRI(D_cur, ipiv, work, info);
784 KOKKOS_INLINE_FUNCTION
785 void join (
volatile value_type& dst,
volatile value_type
const& src)
const { dst += src; }
789 template<
class MatrixType>
790 void Relaxation<MatrixType>::computeBlockCrs ()
803 using Teuchos::rcp_dynamic_cast;
804 using Teuchos::reduceAll;
805 typedef local_ordinal_type LO;
806 typedef typename node_type::device_type device_type;
808 const std::string timerName (
"Ifpack2::Relaxation::computeBlockCrs");
817 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::"
818 "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
819 "with a nonnull input matrix, then call initialize(), before calling "
821 const block_crs_matrix_type* blockCrsA =
822 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
824 blockCrsA ==
nullptr, std::logic_error,
"Ifpack2::Relaxation::"
825 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
826 "got this far. Please report this bug to the Ifpack2 developers.");
828 const scalar_type one = STS::one ();
833 const LO lclNumMeshRows =
834 blockCrsA->getCrsGraph ().getNodeNumRows ();
835 const LO blockSize = blockCrsA->getBlockSize ();
837 if (! savedDiagOffsets_) {
838 blockDiag_ = block_diag_type ();
839 blockDiag_ = block_diag_type (
"Ifpack2::Relaxation::blockDiag_",
840 lclNumMeshRows, blockSize, blockSize);
841 if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
844 diagOffsets_ = Kokkos::View<size_t*, device_type> ();
845 diagOffsets_ = Kokkos::View<size_t*, device_type> (
"offsets", lclNumMeshRows);
847 blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
849 (static_cast<size_t> (diagOffsets_.extent (0)) !=
850 static_cast<size_t> (blockDiag_.extent (0)),
851 std::logic_error,
"diagOffsets_.extent(0) = " <<
852 diagOffsets_.extent (0) <<
" != blockDiag_.extent(0) = "
853 << blockDiag_.extent (0) <<
854 ". Please report this bug to the Ifpack2 developers.");
855 savedDiagOffsets_ =
true;
857 blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
864 unmanaged_block_diag_type blockDiag = blockDiag_;
866 if (DoL1Method_ && IsParallel_) {
867 const scalar_type two = one + one;
868 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
869 Array<LO> indices (maxLength);
870 Array<scalar_type> values (maxLength * blockSize * blockSize);
871 size_t numEntries = 0;
873 for (LO i = 0; i < lclNumMeshRows; ++i) {
875 blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
877 auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
878 for (LO subRow = 0; subRow < blockSize; ++subRow) {
879 magnitude_type diagonal_boost = STM::zero ();
880 for (
size_t k = 0 ; k < numEntries ; ++k) {
881 if (indices[k] > lclNumMeshRows) {
882 const size_t offset = blockSize*blockSize*k + subRow*blockSize;
883 for (LO subCol = 0; subCol < blockSize; ++subCol) {
884 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
888 if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
889 diagBlock(subRow, subRow) += diagonal_boost;
897 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
898 typedef typename unmanaged_block_diag_type::execution_space exec_space;
899 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
901 Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
907 (info > 0, std::runtime_error,
"GETF2 or GETRI failed on = " << info
908 <<
" diagonal blocks.");
913 #ifdef HAVE_IFPACK2_DEBUG
914 const int numResults = 2;
916 int lclResults[2], gblResults[2];
917 lclResults[0] = info;
918 lclResults[1] = -info;
921 reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
922 numResults, lclResults, gblResults);
924 (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
925 "Ifpack2::Relaxation::compute: When processing the input "
926 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
927 "failed on one or more (MPI) processes.");
928 #endif // HAVE_IFPACK2_DEBUG
936 template<
class MatrixType>
939 using Tpetra::readWrite;
950 using Teuchos::rcp_dynamic_cast;
951 using Teuchos::reduceAll;
955 using device_type =
typename vector_type::device_type;
956 using IST =
typename vector_type::impl_scalar_type;
957 using KAT = Kokkos::ArithTraits<IST>;
959 const char methodName[] =
"Ifpack2::Relaxation::compute";
960 const scalar_type zero = STS::zero ();
961 const scalar_type one = STS::one ();
965 #ifdef HAVE_IFPACK2_DEBUG
966 constexpr
bool debug =
true;
968 constexpr
bool debug =
false;
969 #endif // HAVE_IFPACK2_DEBUG
972 (A_.is_null (), std::runtime_error, methodName <<
": "
973 "The input matrix A is null. Please call setMatrix() with a nonnull "
974 "input matrix, then call initialize(), before calling this method.");
977 if (! isInitialized ()) {
981 if (hasBlockCrsMatrix_) {
992 (NumSweeps_ < 0, std::logic_error, methodName
993 <<
": NumSweeps_ = " << NumSweeps_ <<
" < 0. "
994 "Please report this bug to the Ifpack2 developers.");
997 Diagonal_ = Teuchos::null;
1000 Diagonal_ =
rcp (
new vector_type (A_->getRowMap (),
false));
1018 const crs_matrix_type* crsMat =
1019 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
1020 if (crsMat ==
nullptr || ! crsMat->isStaticGraph ()) {
1021 A_->getLocalDiagCopy (*Diagonal_);
1024 if (! savedDiagOffsets_) {
1025 const size_t lclNumRows = A_->getRowMap ()->getNodeNumElements ();
1026 if (diagOffsets_.extent (0) < lclNumRows) {
1027 using Kokkos::view_alloc;
1028 using Kokkos::WithoutInitializing;
1029 using offsets_view_type = Kokkos::View<size_t*, device_type>;
1031 diagOffsets_ = offsets_view_type ();
1032 auto howAlloc = view_alloc (
"offsets", WithoutInitializing);
1033 diagOffsets_ = offsets_view_type (howAlloc, lclNumRows);
1035 crsMat->getCrsGraph ()->getLocalDiagOffsets (diagOffsets_);
1036 savedDiagOffsets_ =
true;
1038 crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_);
1042 vector_type D_copy (A_->getRowMap ());
1043 A_->getLocalDiagCopy (D_copy);
1044 D_copy.update (STS::one (), *Diagonal_, -STS::one ());
1049 (err != STM::zero(), std::logic_error, methodName <<
": "
1050 <<
"\"fast-path\" diagonal computation failed. "
1051 "\\|D1 - D2\\|_inf = " << err <<
".");
1058 std::unique_ptr<vector_type> origDiag;
1059 if (checkDiagEntries_) {
1060 origDiag = std::unique_ptr<vector_type>
1064 const LO numMyRows =
static_cast<LO
> (A_->getNodeNumRows ());
1078 if (DoL1Method_ && IsParallel_) {
1079 vector_type& gblDiag = *Diagonal_;
1081 decltype (readWrite (gblDiag).on (Kokkos::HostSpace ()));
1084 using lcl_vec_type =
1085 Tpetra::with_local_access_function_argument_type<rw_type>;
1088 Tpetra::withLocalAccess
1089 ([&A_row, L1_eta, numMyRows] (
const lcl_vec_type& diag) {
1091 const size_t maxLength = A_row.getNodeMaxNumRowEntries ();
1092 Array<local_ordinal_type> indices (maxLength);
1093 Array<scalar_type> values (maxLength);
1096 for (LO i = 0; i < numMyRows; ++i) {
1097 A_row.getLocalRowCopy (i, indices (), values (), numEntries);
1099 for (
size_t k = 0 ; k < numEntries; ++k) {
1100 if (indices[k] > numMyRows) {
1101 diagonal_boost += STS::magnitude (values[k] / two);
1104 if (KAT::magnitude (diag[i]) < L1_eta * diagonal_boost) {
1105 diag[i] += diagonal_boost;
1108 }, readWrite (gblDiag).on (Kokkos::HostSpace ()));
1122 IST oneOverMinDiagVal = KAT::one () /
static_cast<IST
> (SmallTraits<scalar_type>::eps ());
1123 if ( MinDiagonalValue_ != zero)
1124 oneOverMinDiagVal = KAT::one () /
static_cast<IST
> (MinDiagonalValue_);
1127 const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1129 if (checkDiagEntries_) {
1135 size_t numSmallDiagEntries = 0;
1136 size_t numZeroDiagEntries = 0;
1137 size_t numNegDiagEntries = 0;
1141 vector_type& gblDiag = *Diagonal_;
1145 decltype (readWrite (gblDiag).on (Kokkos::HostSpace ()));
1146 using lcl_vec_type =
1147 Tpetra::with_local_access_function_argument_type<rw_type>;
1148 Tpetra::withLocalAccess
1149 ([&] (
const lcl_vec_type& diag) {
1156 if (numMyRows != 0) {
1158 minMagDiagEntryMag = d_0_mag;
1159 maxMagDiagEntryMag = d_0_mag;
1168 for (LO i = 0; i < numMyRows; ++i) {
1169 const IST d_i = diag[i];
1173 const auto d_i_real = getRealValue (d_i);
1177 if (d_i_real < STM::zero ()) {
1178 ++numNegDiagEntries;
1180 if (d_i_mag < minMagDiagEntryMag) {
1181 minMagDiagEntryMag = d_i_mag;
1183 if (d_i_mag > maxMagDiagEntryMag) {
1184 maxMagDiagEntryMag = d_i_mag;
1187 if (fixTinyDiagEntries_) {
1189 if (d_i_mag <= minDiagValMag) {
1190 ++numSmallDiagEntries;
1191 if (d_i_mag == STM::zero ()) {
1192 ++numZeroDiagEntries;
1194 diag[i] = oneOverMinDiagVal;
1197 diag[i] = KAT::one () / d_i;
1202 if (d_i_mag <= minDiagValMag) {
1203 ++numSmallDiagEntries;
1204 if (d_i_mag == STM::zero ()) {
1205 ++numZeroDiagEntries;
1208 diag[i] = KAT::one () / d_i;
1211 }, readWrite (gblDiag).on (Kokkos::HostSpace ()));
1216 if (STS::isComplex) {
1217 ComputeFlops_ += 4.0 * numMyRows;
1220 ComputeFlops_ += numMyRows;
1237 const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1240 Array<magnitude_type> localVals (2);
1241 localVals[0] = minMagDiagEntryMag;
1243 localVals[1] = -maxMagDiagEntryMag;
1244 Array<magnitude_type> globalVals (2);
1245 reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1246 localVals.getRawPtr (),
1247 globalVals.getRawPtr ());
1248 globalMinMagDiagEntryMag_ = globalVals[0];
1249 globalMaxMagDiagEntryMag_ = -globalVals[1];
1251 Array<size_t> localCounts (3);
1252 localCounts[0] = numSmallDiagEntries;
1253 localCounts[1] = numZeroDiagEntries;
1254 localCounts[2] = numNegDiagEntries;
1255 Array<size_t> globalCounts (3);
1256 reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1257 localCounts.getRawPtr (),
1258 globalCounts.getRawPtr ());
1259 globalNumSmallDiagEntries_ = globalCounts[0];
1260 globalNumZeroDiagEntries_ = globalCounts[1];
1261 globalNumNegDiagEntries_ = globalCounts[2];
1265 vector_type diff (A_->getRowMap ());
1266 diff.reciprocal (*origDiag);
1267 if (Diagonal_->need_sync_device ()) {
1268 Diagonal_->sync_device ();
1270 diff.update (-one, *Diagonal_, one);
1271 globalDiagNormDiff_ = diff.norm2 ();
1274 if (fixTinyDiagEntries_) {
1278 vector_type& gblDiag = *Diagonal_;
1280 (
"Ifpack2::Relaxation::compute: Invert & fix diagonal",
1282 KOKKOS_LAMBDA (
const IST& d_i) {
1286 if (d_i_mag <= minDiagValMag) {
1287 return oneOverMinDiagVal;
1292 return IST (KAT::one () / d_i);
1297 if (Diagonal_->need_sync_device ()) {
1298 Diagonal_->sync_device ();
1300 Diagonal_->reciprocal (*Diagonal_);
1303 if (STS::isComplex) {
1304 ComputeFlops_ += 4.0 * numMyRows;
1307 ComputeFlops_ += numMyRows;
1311 if (Diagonal_->need_sync_device ()) {
1312 Diagonal_->sync_device ();
1315 if (PrecType_ == Ifpack2::Details::MTGS ||
1316 PrecType_ == Ifpack2::Details::MTSGS ||
1317 PrecType_ == Ifpack2::Details::GS2 ||
1318 PrecType_ == Ifpack2::Details::SGS2) {
1322 const crs_matrix_type* crsMat =
1323 dynamic_cast<const crs_matrix_type*
> (A_.get());
1325 (crsMat ==
nullptr, std::logic_error, methodName <<
": "
1326 "Multithreaded Gauss-Seidel methods currently only work "
1327 "when the input matrix is a Tpetra::CrsMatrix.");
1328 local_matrix_type kcsr = crsMat->getLocalMatrix ();
1330 auto diagView_2d = Diagonal_->getLocalViewDevice ();
1331 auto diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1332 using KokkosSparse::Experimental::gauss_seidel_numeric;
1333 gauss_seidel_numeric<mt_kernel_handle_type,
1336 scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1337 A_->getNodeNumRows (),
1338 A_->getNodeNumCols (),
1343 is_matrix_structurally_symmetric_);
1353 template<
class MatrixType>
1356 ApplyInverseRichardson (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1357 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1360 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1361 const double numVectors = as<double> (X.getNumVectors ());
1362 if (ZeroStartingSolution_) {
1366 Y.scale(DampingFactor_,X);
1372 double flopUpdate = 0.0;
1373 if (DampingFactor_ == STS::one ()) {
1375 flopUpdate = numGlobalRows * numVectors;
1379 flopUpdate = numGlobalRows * numVectors;
1381 ApplyFlops_ += flopUpdate;
1382 if (NumSweeps_ == 1) {
1388 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1391 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1393 for (
int j = startSweep; j < NumSweeps_; ++j) {
1395 Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1396 Y.update(DampingFactor_,*cachedMV_,STS::one());
1410 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1411 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1412 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1413 (2.0 * numGlobalNonzeros + dampingFlops);
1417 template<
class MatrixType>
1419 Relaxation<MatrixType>::
1420 ApplyInverseJacobi (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1421 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1424 if (hasBlockCrsMatrix_) {
1425 ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1429 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1430 const double numVectors = as<double> (X.getNumVectors ());
1431 if (ZeroStartingSolution_) {
1436 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1442 double flopUpdate = 0.0;
1443 if (DampingFactor_ == STS::one ()) {
1445 flopUpdate = numGlobalRows * numVectors;
1449 flopUpdate = 2.0 * numGlobalRows * numVectors;
1451 ApplyFlops_ += flopUpdate;
1452 if (NumSweeps_ == 1) {
1458 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1461 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1463 for (
int j = startSweep; j < NumSweeps_; ++j) {
1465 Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1466 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1480 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1481 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1482 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1483 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1486 template<
class MatrixType>
1488 Relaxation<MatrixType>::
1489 ApplyInverseJacobi_BlockCrsMatrix (
const Tpetra::MultiVector<scalar_type,
1491 global_ordinal_type,
1493 Tpetra::MultiVector<scalar_type,
1495 global_ordinal_type,
1496 node_type>& Y)
const
1498 using Tpetra::BlockMultiVector;
1499 using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1500 global_ordinal_type, node_type>;
1502 const block_crs_matrix_type* blockMatConst =
1503 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1505 (blockMatConst ==
nullptr, std::logic_error,
"This method should "
1506 "never be called if the matrix A_ is not a BlockCrsMatrix. "
1507 "Please report this bug to the Ifpack2 developers.");
1512 block_crs_matrix_type* blockMat =
1513 const_cast<block_crs_matrix_type*
> (blockMatConst);
1515 auto meshRowMap = blockMat->getRowMap ();
1516 auto meshColMap = blockMat->getColMap ();
1517 const local_ordinal_type blockSize = blockMat->getBlockSize ();
1519 BMV X_blk (X, *meshColMap, blockSize);
1520 BMV Y_blk (Y, *meshRowMap, blockSize);
1522 if (ZeroStartingSolution_) {
1526 Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1527 if (NumSweeps_ == 1) {
1532 auto pointRowMap = Y.getMap ();
1533 const size_t numVecs = X.getNumVectors ();
1537 BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1541 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1543 for (
int j = startSweep; j < NumSweeps_; ++j) {
1544 blockMat->applyBlock (Y_blk, A_times_Y);
1546 Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1547 X_blk, A_times_Y, STS::one ());
1551 template<
class MatrixType>
1553 Relaxation<MatrixType>::
1554 ApplyInverseGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1555 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1557 using this_type = Relaxation<MatrixType>;
1567 const block_crs_matrix_type* blockCrsMat =
1568 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1569 const crs_matrix_type* crsMat =
1570 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
1571 if (blockCrsMat !=
nullptr) {
1572 const_cast<this_type&
> (*this).ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1574 else if (crsMat !=
nullptr) {
1575 ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1578 ApplyInverseGS_RowMatrix (X, Y);
1583 template<
class MatrixType>
1585 Relaxation<MatrixType>::
1586 ApplyInverseGS_RowMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1587 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1595 using Teuchos::rcpFromRef;
1596 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1601 if (ZeroStartingSolution_) {
1602 Y.putScalar (STS::zero ());
1605 const size_t NumVectors = X.getNumVectors ();
1606 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1607 Array<local_ordinal_type> Indices (maxLength);
1608 Array<scalar_type> Values (maxLength);
1611 const size_t numMyRows = A_->getNodeNumRows();
1612 const local_ordinal_type* rowInd = 0;
1613 size_t numActive = numMyRows;
1614 bool do_local = ! localSmoothingIndices_.is_null ();
1616 rowInd = localSmoothingIndices_.getRawPtr ();
1617 numActive = localSmoothingIndices_.size ();
1622 if (Importer_.is_null ()) {
1623 updateCachedMultiVector (Y.getMap (), NumVectors);
1626 updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
1631 Y2 = rcpFromRef (Y);
1635 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1636 ArrayView<const scalar_type> d_ptr = d_rcp();
1639 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1641 if (constant_stride) {
1643 size_t x_stride = X.getStride();
1644 size_t y2_stride = Y2->getStride();
1645 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1646 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1647 ArrayView<scalar_type> y2_ptr = y2_rcp();
1648 ArrayView<const scalar_type> x_ptr = x_rcp();
1649 Array<scalar_type> dtemp(NumVectors,STS::zero());
1651 for (
int j = 0; j < NumSweeps_; ++j) {
1654 if (Importer_.is_null ()) {
1660 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1664 if (! DoBackwardGS_) {
1665 for (
size_t ii = 0; ii < numActive; ++ii) {
1666 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1668 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1669 dtemp.assign(NumVectors,STS::zero());
1671 for (
size_t k = 0; k < NumEntries; ++k) {
1672 const local_ordinal_type col = Indices[k];
1673 for (
size_t m = 0; m < NumVectors; ++m) {
1674 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1678 for (
size_t m = 0; m < NumVectors; ++m) {
1679 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1686 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1687 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1689 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1690 dtemp.assign (NumVectors, STS::zero ());
1692 for (
size_t k = 0; k < NumEntries; ++k) {
1693 const local_ordinal_type col = Indices[k];
1694 for (
size_t m = 0; m < NumVectors; ++m) {
1695 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1699 for (
size_t m = 0; m < NumVectors; ++m) {
1700 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1706 Tpetra::deep_copy (Y, *Y2);
1712 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1713 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1715 for (
int j = 0; j < NumSweeps_; ++j) {
1718 if (Importer_.is_null ()) {
1721 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1725 if (! DoBackwardGS_) {
1726 for (
size_t ii = 0; ii < numActive; ++ii) {
1727 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1729 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1731 for (
size_t m = 0; m < NumVectors; ++m) {
1732 scalar_type dtemp = STS::zero ();
1733 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1734 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1736 for (
size_t k = 0; k < NumEntries; ++k) {
1737 const local_ordinal_type col = Indices[k];
1738 dtemp += Values[k] * y2_local[col];
1740 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1747 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1748 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1751 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1753 for (
size_t m = 0; m < NumVectors; ++m) {
1754 scalar_type dtemp = STS::zero ();
1755 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1756 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1758 for (
size_t k = 0; k < NumEntries; ++k) {
1759 const local_ordinal_type col = Indices[k];
1760 dtemp += Values[k] * y2_local[col];
1762 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1769 Tpetra::deep_copy (Y, *Y2);
1776 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1777 const double numVectors = as<double> (X.getNumVectors ());
1778 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1779 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1780 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1781 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1785 template<
class MatrixType>
1787 Relaxation<MatrixType>::
1788 ApplyInverseGS_CrsMatrix (
const crs_matrix_type& A,
1789 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1790 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
1793 const Tpetra::ESweepDirection direction =
1794 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1795 if (localSmoothingIndices_.is_null ()) {
1796 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1797 NumSweeps_, ZeroStartingSolution_);
1800 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1801 DampingFactor_, direction,
1802 NumSweeps_, ZeroStartingSolution_);
1816 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1817 const double numVectors = as<double> (X.getNumVectors ());
1818 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1819 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1820 ApplyFlops_ += NumSweeps_ * numVectors *
1821 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1824 template<
class MatrixType>
1826 Relaxation<MatrixType>::
1827 ApplyInverseGS_BlockCrsMatrix (
const block_crs_matrix_type& A,
1828 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1829 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1831 using Tpetra::INSERT;
1834 using Teuchos::rcpFromRef;
1835 using BMV = Tpetra::BlockMultiVector<scalar_type,
1836 local_ordinal_type, global_ordinal_type, node_type>;
1837 using MV = Tpetra::MultiVector<scalar_type,
1838 local_ordinal_type, global_ordinal_type, node_type>;
1839 using map_type = Tpetra::Map<local_ordinal_type,
1840 global_ordinal_type, node_type>;
1841 using import_type = Tpetra::Import<local_ordinal_type,
1842 global_ordinal_type, node_type>;
1851 BMV yBlock(Y, *(A.getGraph ()->getDomainMap()), A.getBlockSize());
1852 const BMV xBlock(X, *(A.getColMap ()), A.getBlockSize());
1854 bool performImport =
false;
1856 if (Importer_.is_null()) {
1857 yBlockCol = rcpFromRef(yBlock);
1860 if (yBlockColumnPointMap_.is_null() ||
1861 yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1862 yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1863 yBlockColumnPointMap_ =
1864 rcp(
new BMV(*(A.getColMap()), A.getBlockSize(),
1865 static_cast<local_ordinal_type
>(yBlock.getNumVectors())));
1867 yBlockCol = yBlockColumnPointMap_;
1868 if (pointImporter_.is_null()) {
1869 auto srcMap =
rcp(
new map_type(yBlock.getPointMap()));
1870 auto tgtMap =
rcp(
new map_type(yBlockCol->getPointMap()));
1871 pointImporter_ =
rcp(
new import_type(srcMap, tgtMap));
1873 performImport =
true;
1878 RCP<const MV> yBlockColPointDomain;
1879 if (performImport) {
1880 yBlock_mv = yBlock.getMultiVectorView();
1881 yBlockCol_mv = yBlockCol->getMultiVectorView();
1882 yBlockColPointDomain =
1883 yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1886 if (ZeroStartingSolution_) {
1887 yBlockCol->putScalar(STS::zero ());
1889 else if (performImport) {
1890 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, INSERT);
1893 const Tpetra::ESweepDirection direction =
1894 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1896 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
1897 if (performImport && sweep > 0) {
1898 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, INSERT);
1900 A.localGaussSeidel(xBlock, *yBlockCol, blockDiag_,
1901 DampingFactor_, direction);
1902 if (performImport) {
1903 Tpetra::deep_copy(Y, *yBlockColPointDomain);
1908 template<
class MatrixType>
1910 Relaxation<MatrixType>::
1911 MTGaussSeidel (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1912 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1913 const Tpetra::ESweepDirection direction)
const
1915 using Teuchos::null;
1918 using Teuchos::rcpFromRef;
1919 using Teuchos::rcp_const_cast;
1922 typedef scalar_type Scalar;
1923 typedef local_ordinal_type LocalOrdinal;
1924 typedef global_ordinal_type GlobalOrdinal;
1925 typedef node_type Node;
1927 const char prefix[] =
"Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1930 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (A_.get());
1932 (crsMat ==
nullptr, std::logic_error,
"Ifpack2::Relaxation::apply: "
1933 "Multithreaded Gauss-Seidel methods currently only work when the "
1934 "input matrix is a Tpetra::CrsMatrix.");
1938 (! localSmoothingIndices_.is_null (), std::logic_error,
1939 "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1940 "implement the case where the user supplies an iteration order. "
1941 "This error used to appear as \"MT GaussSeidel ignores the given "
1943 "I tried to add more explanation, but I didn't implement \"MT "
1944 "GaussSeidel\" [sic]. "
1945 "You'll have to ask the person who did.");
1948 (crsMat ==
nullptr, std::logic_error, prefix <<
"The matrix is null."
1949 " This should never happen. Please report this bug to the Ifpack2 "
1952 (! crsMat->isFillComplete (), std::runtime_error, prefix <<
"The "
1953 "input CrsMatrix is not fill complete. Please call fillComplete "
1954 "on the matrix, then do setup again, before calling apply(). "
1955 "\"Do setup\" means that if only the matrix's values have changed "
1956 "since last setup, you need only call compute(). If the matrix's "
1957 "structure may also have changed, you must first call initialize(), "
1958 "then call compute(). If you have not set up this preconditioner "
1959 "for this matrix before, you must first call initialize(), then "
1962 (NumSweeps_ < 0, std::logic_error, prefix <<
": NumSweeps_ = "
1963 << NumSweeps_ <<
" < 0. Please report this bug to the Ifpack2 "
1965 if (NumSweeps_ == 0) {
1969 typedef typename Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1970 typedef typename crs_matrix_type::import_type import_type;
1971 typedef typename crs_matrix_type::export_type export_type;
1972 typedef typename crs_matrix_type::map_type map_type;
1974 RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1975 RCP<const export_type> exporter = crsMat->getGraph ()->getExporter ();
1977 ! exporter.is_null (), std::runtime_error,
1978 "This method's implementation currently requires that the matrix's row, "
1979 "domain, and range Maps be the same. This cannot be the case, because "
1980 "the matrix has a nontrivial Export object.");
1982 RCP<const map_type> domainMap = crsMat->getDomainMap ();
1983 RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1984 RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1985 RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1987 #ifdef HAVE_IFPACK2_DEBUG
1993 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1994 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1995 "multivector X be in the domain Map of the matrix.");
1997 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1998 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1999 "B be in the range Map of the matrix.");
2001 ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
2002 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
2003 "D be in the row Map of the matrix.");
2005 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
2006 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
2007 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
2009 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
2010 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
2011 "the range Map of the matrix be the same.");
2017 #endif // HAVE_IFPACK2_DEBUG
2028 RCP<MV> X_domainMap;
2029 bool copyBackOutput =
false;
2030 if (importer.is_null ()) {
2031 if (X.isConstantStride ()) {
2032 X_colMap = rcpFromRef (X);
2033 X_domainMap = rcpFromRef (X);
2040 if (ZeroStartingSolution_) {
2041 X_colMap->putScalar (ZERO);
2050 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2051 X_colMap = cachedMV_;
2055 X_domainMap = X_colMap;
2056 if (! ZeroStartingSolution_) {
2058 deep_copy (*X_domainMap , X);
2059 }
catch (std::exception& e) {
2060 std::ostringstream os;
2061 os <<
"Ifpack2::Relaxation::MTGaussSeidel: "
2062 "deep_copy(*X_domainMap, X) threw an exception: "
2063 << e.what () <<
".";
2067 copyBackOutput =
true;
2081 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2082 X_colMap = cachedMV_;
2084 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
2086 #ifdef HAVE_IFPACK2_DEBUG
2087 auto X_colMap_host_view = X_colMap->template getLocalView<Kokkos::HostSpace> ();
2088 auto X_domainMap_host_view = X_domainMap->template getLocalView<Kokkos::HostSpace> ();
2090 if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
2092 X_colMap_host_view.data () != X_domainMap_host_view.data (),
2093 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: "
2094 "Pointer to start of column Map view of X is not equal to pointer to "
2095 "start of (domain Map view of) X. This may mean that "
2096 "Tpetra::MultiVector::offsetViewNonConst is broken. "
2097 "Please report this bug to the Tpetra developers.");
2101 X_colMap_host_view.extent (0) < X_domainMap_host_view.extent (0) ||
2102 X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
2103 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: "
2104 "X_colMap has fewer local rows than X_domainMap. "
2105 "X_colMap_host_view.extent(0) = " << X_colMap_host_view.extent (0)
2106 <<
", X_domainMap_host_view.extent(0) = "
2107 << X_domainMap_host_view.extent (0)
2108 <<
", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
2109 <<
", and X_domainMap->getLocalLength() = "
2110 << X_domainMap->getLocalLength ()
2111 <<
". This means that Tpetra::MultiVector::offsetViewNonConst "
2112 "is broken. Please report this bug to the Tpetra developers.");
2115 X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
2116 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: "
2117 "X_colMap has a different number of columns than X_domainMap. "
2118 "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
2119 <<
" != X_domainMap->getNumVectors() = "
2120 << X_domainMap->getNumVectors ()
2121 <<
". This means that Tpetra::MultiVector::offsetViewNonConst "
2122 "is broken. Please report this bug to the Tpetra developers.");
2123 #endif // HAVE_IFPACK2_DEBUG
2125 if (ZeroStartingSolution_) {
2127 X_colMap->putScalar (ZERO);
2137 X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
2139 copyBackOutput =
true;
2146 if (B.isConstantStride ()) {
2147 B_in = rcpFromRef (B);
2154 RCP<MV> B_in_nonconst =
rcp (
new MV (rowMap, B.getNumVectors()));
2156 deep_copy (*B_in_nonconst, B);
2157 }
catch (std::exception& e) {
2158 std::ostringstream os;
2159 os <<
"Ifpack2::Relaxation::MTGaussSeidel: "
2160 "deep_copy(*B_in_nonconst, B) threw an exception: "
2161 << e.what () <<
".";
2164 B_in = rcp_const_cast<
const MV> (B_in_nonconst);
2178 local_matrix_type kcsr = crsMat->getLocalMatrix ();
2180 bool update_y_vector =
true;
2182 bool zero_x_vector =
false;
2184 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
2185 if (! importer.is_null () && sweep > 0) {
2188 X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2191 if (direction == Tpetra::Symmetric) {
2192 KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2193 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2194 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2195 X_colMap->getLocalViewDevice(),
2196 B_in->getLocalViewDevice(),
2197 zero_x_vector, update_y_vector, DampingFactor_, 1);
2199 else if (direction == Tpetra::Forward) {
2200 KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2201 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2202 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2203 X_colMap->getLocalViewDevice (),
2204 B_in->getLocalViewDevice(),
2205 zero_x_vector, update_y_vector, DampingFactor_, 1);
2207 else if (direction == Tpetra::Backward) {
2208 KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2209 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2210 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2211 X_colMap->getLocalViewDevice(),
2212 B_in->getLocalViewDevice(),
2213 zero_x_vector, update_y_vector, DampingFactor_, 1);
2217 true, std::invalid_argument,
2218 prefix <<
"The 'direction' enum does not have any of its valid "
2219 "values: Forward, Backward, or Symmetric.");
2221 update_y_vector =
false;
2224 if (copyBackOutput) {
2226 deep_copy (X , *X_domainMap);
2227 }
catch (std::exception& e) {
2229 true, std::runtime_error, prefix <<
"deep_copy(X, *X_domainMap) "
2230 "threw an exception: " << e.what ());
2234 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2235 const double numVectors = as<double> (X.getNumVectors ());
2236 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2237 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2238 double ApplyFlops = NumSweeps_ * numVectors *
2239 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2240 if (direction == Tpetra::Symmetric)
2242 ApplyFlops_ += ApplyFlops;
2246 template<
class MatrixType>
2248 Relaxation<MatrixType>::
2249 ApplyInverseMTSGS_CrsMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2250 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X)
const
2252 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2253 this->MTGaussSeidel (B, X, direction);
2257 template<
class MatrixType>
2258 void Relaxation<MatrixType>::ApplyInverseMTGS_CrsMatrix (
2259 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2260 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X)
const {
2262 const Tpetra::ESweepDirection direction =
2263 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
2264 this->MTGaussSeidel (B, X, direction);
2267 template<
class MatrixType>
2269 Relaxation<MatrixType>::
2270 ApplyInverseSGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2271 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
2273 using this_type = Relaxation<MatrixType>;
2283 const block_crs_matrix_type* blockCrsMat =
2284 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
2285 const crs_matrix_type* crsMat =
2286 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
2287 if (blockCrsMat !=
nullptr) {
2288 const_cast<this_type&
> (*this).ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
2290 else if (crsMat !=
nullptr) {
2291 ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
2294 ApplyInverseSGS_RowMatrix (X, Y);
2299 template<
class MatrixType>
2301 Relaxation<MatrixType>::
2302 ApplyInverseSGS_RowMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2303 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
2311 using Teuchos::rcpFromRef;
2312 using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
2313 global_ordinal_type, node_type>;
2318 if (ZeroStartingSolution_) {
2319 Y.putScalar (STS::zero ());
2322 const size_t NumVectors = X.getNumVectors ();
2323 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
2324 Array<local_ordinal_type> Indices (maxLength);
2325 Array<scalar_type> Values (maxLength);
2328 const size_t numMyRows = A_->getNodeNumRows();
2329 const local_ordinal_type * rowInd = 0;
2330 size_t numActive = numMyRows;
2331 bool do_local = !localSmoothingIndices_.is_null();
2333 rowInd = localSmoothingIndices_.getRawPtr();
2334 numActive = localSmoothingIndices_.size();
2340 if (Importer_.is_null ()) {
2341 updateCachedMultiVector (Y.getMap (), NumVectors);
2344 updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
2349 Y2 = rcpFromRef (Y);
2353 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
2354 ArrayView<const scalar_type> d_ptr = d_rcp();
2357 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
2359 if(constant_stride) {
2361 size_t x_stride = X.getStride();
2362 size_t y2_stride = Y2->getStride();
2363 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
2364 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
2365 ArrayView<scalar_type> y2_ptr = y2_rcp();
2366 ArrayView<const scalar_type> x_ptr = x_rcp();
2367 Array<scalar_type> dtemp(NumVectors,STS::zero());
2369 for (
int j = 0; j < NumSweeps_; j++) {
2372 if (Importer_.is_null ()) {
2374 Tpetra::deep_copy (*Y2, Y);
2376 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2379 for (
size_t ii = 0; ii < numActive; ++ii) {
2380 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2382 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2383 dtemp.assign(NumVectors,STS::zero());
2385 for (
size_t k = 0; k < NumEntries; ++k) {
2386 const local_ordinal_type col = Indices[k];
2387 for (
size_t m = 0; m < NumVectors; ++m) {
2388 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2392 for (
size_t m = 0; m < NumVectors; ++m) {
2393 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2399 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2400 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2402 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2403 dtemp.assign(NumVectors,STS::zero());
2405 for (
size_t k = 0; k < NumEntries; ++k) {
2406 const local_ordinal_type col = Indices[k];
2407 for (
size_t m = 0; m < NumVectors; ++m) {
2408 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2412 for (
size_t m = 0; m < NumVectors; ++m) {
2413 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2419 Tpetra::deep_copy (Y, *Y2);
2425 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
2426 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
2428 for (
int iter = 0; iter < NumSweeps_; ++iter) {
2431 if (Importer_.is_null ()) {
2433 Tpetra::deep_copy (*Y2, Y);
2435 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2439 for (
size_t ii = 0; ii < numActive; ++ii) {
2440 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2441 const scalar_type diag = d_ptr[i];
2443 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2445 for (
size_t m = 0; m < NumVectors; ++m) {
2446 scalar_type dtemp = STS::zero ();
2447 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2448 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2450 for (
size_t k = 0; k < NumEntries; ++k) {
2451 const local_ordinal_type col = Indices[k];
2452 dtemp += Values[k] * y2_local[col];
2454 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2460 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2461 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2462 const scalar_type diag = d_ptr[i];
2464 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2466 for (
size_t m = 0; m < NumVectors; ++m) {
2467 scalar_type dtemp = STS::zero ();
2468 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2469 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2471 for (
size_t k = 0; k < NumEntries; ++k) {
2472 const local_ordinal_type col = Indices[k];
2473 dtemp += Values[k] * y2_local[col];
2475 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2481 Tpetra::deep_copy (Y, *Y2);
2487 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2488 const double numVectors = as<double> (X.getNumVectors ());
2489 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2490 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2491 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2492 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2496 template<
class MatrixType>
2498 Relaxation<MatrixType>::
2499 ApplyInverseSGS_CrsMatrix (
const crs_matrix_type& A,
2500 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2501 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const
2504 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2505 if (localSmoothingIndices_.is_null ()) {
2506 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
2507 NumSweeps_, ZeroStartingSolution_);
2510 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
2511 DampingFactor_, direction,
2512 NumSweeps_, ZeroStartingSolution_);
2529 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2530 const double numVectors = as<double> (X.getNumVectors ());
2531 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2532 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2533 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2534 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2537 template<
class MatrixType>
2539 Relaxation<MatrixType>::
2540 ApplyInverseSGS_BlockCrsMatrix (
const block_crs_matrix_type& A,
2541 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2542 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
2544 using Tpetra::INSERT;
2547 using Teuchos::rcpFromRef;
2548 using BMV = Tpetra::BlockMultiVector<scalar_type,
2549 local_ordinal_type, global_ordinal_type, node_type>;
2550 using MV = Tpetra::MultiVector<scalar_type,
2551 local_ordinal_type, global_ordinal_type, node_type>;
2552 using map_type = Tpetra::Map<local_ordinal_type,
2553 global_ordinal_type, node_type>;
2554 using import_type = Tpetra::Import<local_ordinal_type,
2555 global_ordinal_type, node_type>;
2564 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
2565 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
2567 bool performImport =
false;
2569 if (Importer_.is_null ()) {
2570 yBlockCol = Teuchos::rcpFromRef (yBlock);
2573 if (yBlockColumnPointMap_.is_null () ||
2574 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
2575 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
2576 yBlockColumnPointMap_ =
2577 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
2578 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
2580 yBlockCol = yBlockColumnPointMap_;
2581 if (pointImporter_.is_null()) {
2582 auto srcMap =
rcp(
new map_type(yBlock.getPointMap()));
2583 auto tgtMap =
rcp(
new map_type(yBlockCol->getPointMap()));
2584 pointImporter_ =
rcp(
new import_type(srcMap, tgtMap));
2586 performImport =
true;
2591 RCP<const MV> yBlockColPointDomain;
2592 if (performImport) {
2593 yBlock_mv = yBlock.getMultiVectorView();
2594 yBlockCol_mv = yBlockCol->getMultiVectorView();
2595 yBlockColPointDomain =
2596 yBlockCol_mv.offsetView(A.getDomainMap(), 0);
2599 if (ZeroStartingSolution_) {
2600 yBlockCol->putScalar(STS::zero ());
2602 else if (performImport) {
2603 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, INSERT);
2607 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2609 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
2610 if (performImport && sweep > 0) {
2611 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, INSERT);
2613 A.localGaussSeidel(xBlock, *yBlockCol, blockDiag_,
2614 DampingFactor_, direction);
2615 if (performImport) {
2616 Tpetra::deep_copy(yBlock_mv, *yBlockColPointDomain);
2621 template<
class MatrixType>
2624 std::ostringstream os;
2629 os <<
"\"Ifpack2::Relaxation\": {";
2631 os <<
"Initialized: " << (isInitialized () ?
"true" :
"false") <<
", "
2632 <<
"Computed: " << (isComputed () ?
"true" :
"false") <<
", ";
2638 if (PrecType_ == Ifpack2::Details::JACOBI) {
2640 }
else if (PrecType_ == Ifpack2::Details::GS) {
2641 os <<
"Gauss-Seidel";
2642 }
else if (PrecType_ == Ifpack2::Details::SGS) {
2643 os <<
"Symmetric Gauss-Seidel";
2644 }
else if (PrecType_ == Ifpack2::Details::MTGS) {
2645 os <<
"MT Gauss-Seidel";
2646 }
else if (PrecType_ == Ifpack2::Details::MTSGS) {
2647 os <<
"MT Symmetric Gauss-Seidel";
2648 }
else if (PrecType_ == Ifpack2::Details::GS2) {
2649 os <<
"Two-stage Gauss-Seidel";
2650 }
else if (PrecType_ == Ifpack2::Details::SGS2) {
2651 os <<
"Two-stage Symmetric Gauss-Seidel";
2657 os <<
", " <<
"sweeps: " << NumSweeps_ <<
", "
2658 <<
"damping factor: " << DampingFactor_ <<
", ";
2660 os <<
"use l1: " << DoL1Method_ <<
", "
2661 <<
"l1 eta: " << L1Eta_ <<
", ";
2664 if (A_.is_null ()) {
2665 os <<
"Matrix: null";
2668 os <<
"Global matrix dimensions: ["
2669 << A_->getGlobalNumRows () <<
", " << A_->getGlobalNumCols () <<
"]"
2670 <<
", Global nnz: " << A_->getGlobalNumEntries();
2678 template<
class MatrixType>
2697 const int myRank = this->getComm ()->getRank ();
2709 out <<
"\"Ifpack2::Relaxation\":" << endl;
2711 out <<
"MatrixType: \"" << TypeNameTraits<MatrixType>::name () <<
"\""
2713 if (this->getObjectLabel () !=
"") {
2714 out <<
"Label: " << this->getObjectLabel () << endl;
2716 out <<
"Initialized: " << (isInitialized () ?
"true" :
"false") << endl
2717 <<
"Computed: " << (isComputed () ?
"true" :
"false") << endl
2718 <<
"Parameters: " << endl;
2721 out <<
"\"relaxation: type\": ";
2722 if (PrecType_ == Ifpack2::Details::JACOBI) {
2724 }
else if (PrecType_ == Ifpack2::Details::GS) {
2725 out <<
"Gauss-Seidel";
2726 }
else if (PrecType_ == Ifpack2::Details::SGS) {
2727 out <<
"Symmetric Gauss-Seidel";
2728 }
else if (PrecType_ == Ifpack2::Details::MTGS) {
2729 out <<
"MT Gauss-Seidel";
2730 }
else if (PrecType_ == Ifpack2::Details::MTSGS) {
2731 out <<
"MT Symmetric Gauss-Seidel";
2732 }
else if (PrecType_ == Ifpack2::Details::GS2) {
2733 out <<
"Two-stage Gauss-Seidel";
2734 }
else if (PrecType_ == Ifpack2::Details::SGS2) {
2735 out <<
"Two-stage Symmetric Gauss-Seidel";
2742 <<
"\"relaxation: sweeps\": " << NumSweeps_ << endl
2743 <<
"\"relaxation: damping factor\": " << DampingFactor_ << endl
2744 <<
"\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2745 <<
"\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2746 <<
"\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2747 <<
"\"relaxation: use l1\": " << DoL1Method_ << endl
2748 <<
"\"relaxation: l1 eta\": " << L1Eta_ << endl;
2750 out <<
"Computed quantities:" << endl;
2753 out <<
"Global number of rows: " << A_->getGlobalNumRows () << endl
2754 <<
"Global number of columns: " << A_->getGlobalNumCols () << endl;
2756 if (checkDiagEntries_ && isComputed ()) {
2757 out <<
"Properties of input diagonal entries:" << endl;
2760 out <<
"Magnitude of minimum-magnitude diagonal entry: "
2761 << globalMinMagDiagEntryMag_ << endl
2762 <<
"Magnitude of maximum-magnitude diagonal entry: "
2763 << globalMaxMagDiagEntryMag_ << endl
2764 <<
"Number of diagonal entries with small magnitude: "
2765 << globalNumSmallDiagEntries_ << endl
2766 <<
"Number of zero diagonal entries: "
2767 << globalNumZeroDiagEntries_ << endl
2768 <<
"Number of diagonal entries with negative real part: "
2769 << globalNumNegDiagEntries_ << endl
2770 <<
"Abs 2-norm diff between computed and actual inverse "
2771 <<
"diagonal: " << globalDiagNormDiff_ << endl;
2774 if (isComputed ()) {
2775 out <<
"Saved diagonal offsets: "
2776 << (savedDiagOffsets_ ?
"true" :
"false") << endl;
2778 out <<
"Call counts and total times (in seconds): " << endl;
2781 out <<
"initialize: " << endl;
2784 out <<
"Call count: " << NumInitialize_ << endl;
2785 out <<
"Total time: " << InitializeTime_ << endl;
2787 out <<
"compute: " << endl;
2790 out <<
"Call count: " << NumCompute_ << endl;
2791 out <<
"Total time: " << ComputeTime_ << endl;
2793 out <<
"apply: " << endl;
2796 out <<
"Call count: " << NumApply_ << endl;
2797 out <<
"Total time: " << ApplyTime_ << endl;
2805 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2806 template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2808 #endif // IFPACK2_RELAXATION_DEF_HPP
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:440
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:482
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:488
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2681
bool is_null(const boost::shared_ptr< T > &p)
basic_OSTab< char > OSTab
static magnitudeType eps()
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:215
T & get(const std::string &name, T def_value)
void compute()
Compute the preconditioner ("numeric setup");.
Definition: Ifpack2_Relaxation_def.hpp:937
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
virtual void printDoc(std::string const &docString, std::ostream &out) const =0
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:495
virtual const std::string getXMLTypeName() const =0
static std::ostream & printLines(std::ostream &os, const std::string &linePrefix, const std::string &lines)
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:397
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:194
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:458
bool remove(std::string const &name, bool throwIfNotExists=true)
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:470
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:262
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:417
void setParameters(const Teuchos::ParameterList ¶ms)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:387
File for utility functions.
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2622
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:446
void validateParametersAndSetDefaults(ParameterList const &validParamList, int const depth=1000)
virtual ValidStringsList validStringValues() const =0
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:430
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:259
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:464
any & getAny(bool activeQry=true)
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:616
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:408
TypeTo as(const TypeFrom &t)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:256
bool isType(const std::string &name) const
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:253
double totalElapsedTime(bool readCurrentTime=false) const
std::string typeName() const
const std::type_info & type() const
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:452
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:236
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:476
void getValidParameters(Teuchos::ParameterList ¶ms)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:51
void initialize()
Initialize the preconditioner ("symbolic setup").
Definition: Ifpack2_Relaxation_def.hpp:637
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Relaxation_decl.hpp:269
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:508
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:227
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:265
virtual void validate(ParameterEntry const &entry, std::string const ¶mName, std::string const &sublistName) const =0