43 #ifndef IFPACK2_TRIDICONTAINER_DEF_HPP
44 #define IFPACK2_TRIDICONTAINER_DEF_HPP
53 # include "Teuchos_DefaultSerialComm.hpp"
59 template<
class MatrixType,
class LocalScalarType>
60 TriDiContainer<MatrixType, LocalScalarType, true>::
65 scalar_type DampingFactor) :
66 Container<MatrixType> (matrix, partitions, importer, OverlapLevel,
68 ipiv_ (this->partitions_.size(), 0),
69 IsInitialized_ (false),
72 scalarOffsets_ (this->numBlocks_)
80 ! matrix->hasColMap (), std::invalid_argument,
"Ifpack2::TriDiContainer: "
81 "The constructor's input matrix must have a column Map.");
84 const map_type& rowMap = * (matrix->getRowMap ());
86 for(
int i = 0; i < this->numBlocks_; i++)
89 for(local_ordinal_type j = 0; j < this->blockRows_[i]; j++)
92 !rowMap.isNodeLocalElement(this->partitions_[this->partitionIndices_[i] + j]),
93 std::invalid_argument,
"Ifpack2::TriDiContainer: "
94 "On process " << rowMap.getComm()->getRank() <<
" of "
95 << rowMap.getComm()->getSize() <<
", in the given set of local row "
97 "entries is not valid local row index on the calling process: "
98 << localRows[j] <<
".");
106 local_ordinal_type scalarTotal = 0;
107 for(local_ordinal_type i = 0; i < this->numBlocks_; i++)
109 scalarOffsets_[i] = scalarTotal;
110 if(this->blockRows_[i] == 1)
113 scalarTotal += 4 * (this->blockRows_[i] - 1);
116 scalars_ =
new local_scalar_type[scalarTotal];
117 diagBlocks_.reserve(this->numBlocks_);
118 for(
int i = 0; i < this->numBlocks_; i++)
119 diagBlocks_.emplace_back(
Teuchos::View, scalars_ + scalarOffsets_[i], this->blockRows_[i]);
122 template<
class MatrixType,
class LocalScalarType>
123 TriDiContainer<MatrixType, LocalScalarType, true>::
126 Container<MatrixType> (matrix, localRows),
127 ipiv_ (this->partitions_.size(), 0),
128 IsInitialized_ (false),
138 !matrix->hasColMap(), std::invalid_argument,
"Ifpack2::TriDiContainer: "
139 "The constructor's input matrix must have a column Map.");
142 const map_type& rowMap = *(matrix->getRowMap());
144 for(local_ordinal_type j = 0; j < this->blockRows_[0]; j++)
148 !rowMap.isNodeLocalElement(this->partitions_[this->partitionIndices_[0] + j]),
149 std::invalid_argument,
"Ifpack2::TriDiContainer: "
150 "On process " << rowMap.getComm ()->getRank () <<
" of "
151 << rowMap.getComm ()->getSize () <<
", in the given set of local row "
153 "entries is not valid local row index on the calling process: "
154 << localRows[j] <<
".");
160 diagBlocks_.emplace_back(this->blockRows_[0], this->blockRows_[0],
true);
163 template<
class MatrixType,
class LocalScalarType>
164 TriDiContainer<MatrixType, LocalScalarType, true>::~TriDiContainer ()
170 template<
class MatrixType,
class LocalScalarType>
171 bool TriDiContainer<MatrixType, LocalScalarType, true>::isInitialized ()
const
173 return IsInitialized_;
176 template<
class MatrixType,
class LocalScalarType>
177 bool TriDiContainer<MatrixType, LocalScalarType, true>::isComputed ()
const
182 template<
class MatrixType,
class LocalScalarType>
183 void TriDiContainer<MatrixType, LocalScalarType, true>::
189 template<
class MatrixType,
class LocalScalarType>
190 void TriDiContainer<MatrixType, LocalScalarType, true>::initialize ()
192 for(
int i = 0; i < this->numBlocks_; i++)
194 std::fill(ipiv_.begin(), ipiv_.end(), 0);
195 IsInitialized_ =
true;
201 template<
class MatrixType,
class LocalScalarType>
202 void TriDiContainer<MatrixType, LocalScalarType, true>::compute ()
205 ipiv_.size () != this->partitions_.size(), std::logic_error,
206 "Ifpack2::TriDiContainer::compute: ipiv_ array has the wrong size. "
207 "Please report this bug to the Ifpack2 developers.");
210 if (! this->isInitialized ()) {
221 template<
class MatrixType,
class LocalScalarType>
222 void TriDiContainer<MatrixType, LocalScalarType, true>::clearBlocks ()
224 std::vector<HostViewLocal> empty1;
225 std::swap(empty1, X_local);
226 std::vector<HostViewLocal> empty2;
227 std::swap(empty2, Y_local);
228 Container<MatrixType>::clearBlocks();
231 template<
class MatrixType,
class LocalScalarType>
232 void TriDiContainer<MatrixType, LocalScalarType, true>::factor ()
234 for(
int i = 0; i < this->numBlocks_; i++)
238 int* blockIpiv = (
int*) ipiv_.getRawPtr() + this->partitionIndices_[i];
239 lapack.
GTTRF (diagBlocks_[i].numRowsCols (),
243 diagBlocks_[i].DU2(),
247 INFO < 0, std::logic_error,
"Ifpack2::TriDiContainer::factor: "
248 "LAPACK's _GTTRF (LU factorization with partial pivoting) was called "
249 "incorrectly. INFO = " << INFO <<
" < 0. "
250 "Please report this bug to the Ifpack2 developers.");
255 INFO > 0, std::runtime_error,
"Ifpack2::TriDiContainer::factor: "
256 "LAPACK's _GTTRF (LU factorization with partial pivoting) reports that the "
257 "computed U factor is exactly singular. U(" << INFO <<
"," << INFO <<
") "
258 "(one-based index i) is exactly zero. This probably means that the input "
259 "matrix has a singular diagonal block.");
263 template<
class MatrixType,
class LocalScalarType>
264 void TriDiContainer<MatrixType, LocalScalarType, true>::
265 applyImpl (HostViewLocal& X,
270 local_scalar_type alpha,
271 local_scalar_type beta)
const
274 auto zero = STS::zero();
275 size_t numVecs = X.extent(1);
276 size_t numRows = X.extent(0);
279 X.extent (0) != Y.extent (0),
280 std::logic_error,
"Ifpack2::TriDiContainer::applyImpl: X and Y have "
281 "incompatible dimensions (" << X.extent (0) <<
" resp. "
282 << Y.extent (0) <<
"). Please report this bug to "
283 "the Ifpack2 developers.");
285 X.extent (0) !=
static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
286 std::logic_error,
"Ifpack2::TriDiContainer::applyImpl: The input "
287 "multivector X has incompatible dimensions from those of the "
288 "inverse operator (" << X.extent (0) <<
" vs. "
289 << (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols () : diagBlocks_[blockIndex].numRowsCols())
290 <<
"). Please report this bug to the Ifpack2 developers.");
292 Y.extent (0) !=
static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
293 std::logic_error,
"Ifpack2::TriDiContainer::applyImpl: The output "
294 "multivector Y has incompatible dimensions from those of the "
295 "inverse operator (" << Y.extent (0) <<
" vs. "
296 << (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols ())
297 <<
"). Please report this bug to the Ifpack2 developers.");
304 for(
size_t j = 0; j < Y.extent(1); j++)
305 for(
size_t i = 0; i < Y.extent(0); i++)
309 for(
size_t j = 0; j < Y.extent(1); j++)
310 for(
size_t i = 0; i < Y.extent(0); i++)
319 HostViewLocal Y_tmp(
"", numRows, numVecs);
320 Kokkos::deep_copy(Y_tmp, X);
321 scalar_type* Y_ptr = Y_tmp.data();
325 int* blockIpiv = (
int*) ipiv_.getRawPtr() + this->partitionIndices_[blockIndex];
327 diagBlocks_[blockIndex].numRowsCols(),
329 diagBlocks_[blockIndex].DL(),
330 diagBlocks_[blockIndex].D(),
331 diagBlocks_[blockIndex].DU(),
332 diagBlocks_[blockIndex].DU2(),
338 INFO != 0, std::runtime_error,
"Ifpack2::TriDiContainer::applyImpl: "
339 "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
340 "failed with INFO = " << INFO <<
" != 0.");
342 if (beta != STS::zero ()) {
343 for(
size_t j = 0; j < Y.extent(1); j++)
345 for(
size_t i = 0; i < Y.extent(0); i++)
348 Y(i, j) += alpha * Y_tmp(i, j);
353 for(
size_t j = 0; j < Y.extent(1); j++)
355 for(
size_t i = 0; i < Y.extent(0); i++)
356 Y(i, j) = Y_tmp(i, j);
362 template<
class MatrixType,
class LocalScalarType>
363 void TriDiContainer<MatrixType, LocalScalarType, true>::
370 scalar_type beta)
const
383 Details::MultiVectorLocalGatherScatter<mv_type, local_mv_type> mvgs;
386 ! IsComputed_, std::runtime_error,
"Ifpack2::TriDiContainer::apply: "
387 "You must have called the compute() method before you may call apply(). "
388 "You may call the apply() method as many times as you want after calling "
389 "compute() once, but you must have called compute() at least once.");
391 const size_t numVecs = X.extent(1);
422 if(X_local.size() == 0)
426 for(
int i = 0; i < this->numBlocks_; i++)
428 X_local.emplace_back(
"", this->blockRows_[i], numVecs);
430 for(
int i = 0; i < this->numBlocks_; i++)
432 Y_local.emplace_back(
"", this->blockRows_[i], numVecs);
436 const ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
438 mvgs.gatherViewToView (X_local[blockIndex], X, localRows);
445 mvgs.gatherViewToView (Y_local[blockIndex], Y, localRows);
449 this->applyImpl (X_local[blockIndex], Y_local[blockIndex], blockIndex, stride, mode,
450 as<local_scalar_type>(alpha), as<local_scalar_type>(beta));
454 mvgs.scatterViewToView (Y, Y_local[blockIndex], localRows);
457 template<
class MatrixType,
class LocalScalarType>
458 void TriDiContainer<MatrixType, LocalScalarType, true>::
459 weightedApply (HostView& X,
466 scalar_type beta)
const
475 using Teuchos::rcp_const_cast;
487 Details::MultiVectorLocalGatherScatter<mv_type, local_mv_type> mvgs;
489 size_t numRows = this->blockRows_[blockIndex];
490 size_t numVecs = X.extent(1);
497 ! IsComputed_, std::runtime_error,
"Ifpack2::TriDiContainer::"
498 "weightedApply: You must have called the compute() method before you may "
499 "call apply(). You may call the apply() method as many times as you want "
500 "after calling compute() once, but you must have called compute() at least "
526 if(X_local.size() == 0)
530 for(
int i = 0; i < this->numBlocks_; i++)
532 X_local.emplace_back(
"", this->blockRows_[i], numVecs);
534 for(
int i = 0; i < this->numBlocks_; i++)
536 Y_local.emplace_back(
"", this->blockRows_[i], numVecs);
540 ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
542 mvgs.gatherViewToView (X_local[blockIndex], X, localRows);
549 mvgs.gatherViewToView (Y_local[blockIndex], Y, localRows);
561 HostViewLocal D_local(
"", numVecs, numRows);
563 mvgs.gatherViewToView (D_local, D, localRows);
565 HostViewLocal X_scaled(
"", numVecs, numRows);
567 for(
size_t i = 0; i < X_scaled.extent(0); i++) {
568 for(
size_t j = 0; j < X_scaled.extent(1); j++) {
569 X_scaled(i, j) = X_local[blockIndex](i, j) * D_local(0, j);
578 HostViewLocal Y_temp(
"", Y.extent(0), Y.extent(1));
581 applyImpl(X_scaled, Y_temp, blockIndex, stride, mode, STS::one(), STS::zero());
587 for(
size_t i = 0; i < Y.extent(0); i++) {
588 for(
size_t j = 0; j < Y.extent(1); j++) {
589 Y_local[blockIndex](i, j) *= beta;
590 Y_local[blockIndex](i, j) += alpha * D_local(i, 0) * Y_temp(i, j);
596 mvgs.scatterViewToView (Y, Y_local[blockIndex], localRows);
599 template<
class MatrixType,
class LocalScalarType>
600 std::ostream& TriDiContainer<MatrixType, LocalScalarType, true>::print(std::ostream& os)
const
603 fos.setOutputToRootOnly(0);
608 template<
class MatrixType,
class LocalScalarType>
609 std::string TriDiContainer<MatrixType, LocalScalarType, true>::description()
const
611 std::ostringstream oss;
613 if (isInitialized()) {
615 oss <<
"{status = initialized, computed";
618 oss <<
"{status = initialized, not computed";
622 oss <<
"{status = not initialized, not computed";
629 template<
class MatrixType,
class LocalScalarType>
631 TriDiContainer<MatrixType, LocalScalarType, true>::
637 os <<
"================================================================================" << endl;
638 os <<
"Ifpack2::TriDiContainer" << endl;
639 os <<
"Number of blocks = " << this->numBlocks_ << endl;
640 os <<
"isInitialized() = " << IsInitialized_ << endl;
641 os <<
"isComputed() = " << IsComputed_ << endl;
642 os <<
"================================================================================" << endl;
646 template<
class MatrixType,
class LocalScalarType>
648 TriDiContainer<MatrixType, LocalScalarType, true>::
654 auto& A = *this->inputMatrix_;
655 const size_t inputMatrixNumRows = A.getNodeNumRows();
659 const int myRank = A.getRowMap()->getComm()->getRank();
660 const int numProcs = A.getRowMap()->getComm()->getSize();
664 for(
int i = 0; i < this->numBlocks_; i++)
666 const local_ordinal_type numRows_ = this->blockRows_[i];
668 for(local_ordinal_type j = 0; j < numRows_; j++)
672 static_cast<size_t> (localRows[j]) >= inputMatrixNumRows,
673 std::runtime_error,
"Ifpack2::TriDiContainer::extract: On process " <<
674 myRank <<
" of " << numProcs <<
", localRows[j=" << j <<
"] = " <<
675 localRows[j] <<
", which is out of the valid range of local row indices "
676 "indices [0, " << (inputMatrixNumRows - 1) <<
"] for the input matrix.");
689 const map_type& globalRowMap = *(A.getRowMap());
690 const map_type& globalColMap = *(A.getColMap());
691 const map_type& globalDomMap = *(A.getDomainMap());
693 bool rowIndsValid =
true;
694 bool colIndsValid =
true;
695 Array<local_ordinal_type> localCols (numRows_);
698 Array<local_ordinal_type> invalidLocalRowInds;
699 Array<global_ordinal_type> invalidGlobalColInds;
700 for (local_ordinal_type j = 0; j < numRows_; j++)
703 const local_ordinal_type ii_local = localRows[j];
708 const global_ordinal_type jj_global = globalRowMap.getGlobalElement(ii_local);
716 rowIndsValid =
false;
717 invalidLocalRowInds.push_back(ii_local);
722 if(globalDomMap.isNodeGlobalElement (jj_global))
732 const local_ordinal_type jj_local = globalColMap.getLocalElement(jj_global);
735 colIndsValid =
false;
736 invalidGlobalColInds.push_back(jj_global);
739 localCols[j] = jj_local;
743 !rowIndsValid, std::logic_error,
"Ifpack2::TriDiContainer::extract: "
744 "On process " << myRank <<
", at least one row index in the set of local "
745 "row indices given to the constructor is not a valid local row index in "
746 "the input matrix's row Map on this process. This should be impossible "
747 "because the constructor checks for this case. Here is the complete set "
748 "of invalid local row indices: " <<
toString (invalidLocalRowInds) <<
". "
749 "Please report this bug to the Ifpack2 developers.");
751 !colIndsValid, std::runtime_error,
"Ifpack2::TriDiContainer::extract: "
752 "On process " << myRank <<
", "
753 "At least one row index in the set of row indices given to the constructor "
754 "does not have a corresponding column index in the input matrix's column "
755 "Map. This probably means that the column(s) in question is/are empty on "
756 "this process, which would make the submatrix to extract structurally "
757 "singular. Here is the compete set of invalid global column indices: "
758 <<
toString (invalidGlobalColInds) <<
".");
761 const size_t maxNumEntriesInRow = A.getNodeMaxNumRowEntries();
762 Array<scalar_type> val(maxNumEntriesInRow);
763 Array<local_ordinal_type> ind(maxNumEntriesInRow);
766 for(local_ordinal_type j = 0; j < numRows_; j++)
768 const local_ordinal_type localRow = localRows[j];
770 A.getLocalRowCopy(localRow, ind(), val(), numEntries);
772 for(
size_t k = 0; k < numEntries; k++)
774 const local_ordinal_type localCol = ind[k];
785 if(localCol >= 0 && static_cast<size_t> (localCol) < inputMatrixNumRows)
789 local_ordinal_type jj = INVALID;
790 for (local_ordinal_type kk = 0; kk < numRows_; kk++)
792 if(localRows[kk] == localCol)
796 diagBlocks_[i](j, jj) += val[k];
803 template<
class MatrixType,
class LocalScalarType>
804 std::string TriDiContainer<MatrixType, LocalScalarType, true>::getName()
809 template<
class MatrixType,
class LocalScalarType>
810 TriDiContainer<MatrixType, LocalScalarType, false>::
815 scalar_type DampingFactor) :
816 Container<MatrixType> (matrix, partitions, importer, OverlapLevel,
820 (
true, std::logic_error,
"Ifpack2::TriDiContainer: Not implemented for "
825 template<
class MatrixType,
class LocalScalarType>
826 TriDiContainer<MatrixType, LocalScalarType, false>::
829 Container<MatrixType> (matrix, localRows)
832 (
true, std::logic_error,
"Ifpack2::TriDiContainer: Not implemented for "
837 template<
class MatrixType,
class LocalScalarType>
838 TriDiContainer<MatrixType, LocalScalarType, false>::~TriDiContainer () {}
840 template<
class MatrixType,
class LocalScalarType>
841 bool TriDiContainer<MatrixType, LocalScalarType, false>::isInitialized ()
const
846 template<
class MatrixType,
class LocalScalarType>
847 bool TriDiContainer<MatrixType, LocalScalarType, false>::isComputed ()
const
852 template<
class MatrixType,
class LocalScalarType>
853 void TriDiContainer<MatrixType, LocalScalarType, false>::
856 template<
class MatrixType,
class LocalScalarType>
857 void TriDiContainer<MatrixType, LocalScalarType, false>::initialize () {}
859 template<
class MatrixType,
class LocalScalarType>
860 void TriDiContainer<MatrixType, LocalScalarType, false>::compute () {}
862 template<
class MatrixType,
class LocalScalarType>
863 void TriDiContainer<MatrixType, LocalScalarType, false>::clearBlocks () {}
865 template<
class MatrixType,
class LocalScalarType>
866 void TriDiContainer<MatrixType, LocalScalarType, false>::factor () {}
868 template<
class MatrixType,
class LocalScalarType>
869 void TriDiContainer<MatrixType, LocalScalarType, false>::
870 applyImpl (HostViewLocal& X,
875 local_scalar_type alpha,
876 local_scalar_type beta)
const {}
878 template<
class MatrixType,
class LocalScalarType>
879 void TriDiContainer<MatrixType, LocalScalarType, false>::
886 scalar_type beta)
const {}
888 template<
class MatrixType,
class LocalScalarType>
889 void TriDiContainer<MatrixType, LocalScalarType, false>::
890 weightedApply (HostView& X,
897 scalar_type beta)
const {}
899 template<
class MatrixType,
class LocalScalarType>
900 std::ostream& TriDiContainer<MatrixType, LocalScalarType, false>::print(std::ostream& os)
const
905 template<
class MatrixType,
class LocalScalarType>
906 std::string TriDiContainer<MatrixType, LocalScalarType, false>::description()
const
911 template<
class MatrixType,
class LocalScalarType>
913 TriDiContainer<MatrixType, LocalScalarType, false>::
917 template<
class MatrixType,
class LocalScalarType>
919 TriDiContainer<MatrixType, LocalScalarType, false>::
922 template<
class MatrixType,
class LocalScalarType>
923 std::string TriDiContainer<MatrixType, LocalScalarType, false>::getName()
928 #define IFPACK2_TRIDICONTAINER_INSTANT(S,LO,GO,N) \
929 template class Ifpack2::TriDiContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
Ifpack2::TriDiContainer class declaration.
void GTTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *dl, const ScalarType *d, const ScalarType *du, const ScalarType *du2, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
void GTTRF(const OrdinalType &n, ScalarType *dl, ScalarType *d, ScalarType *du, ScalarType *du2, OrdinalType *IPIV, OrdinalType *info) const
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
virtual std::string description() const
TEUCHOSCORE_LIB_DLL_EXPORT std::string toString(const EVerbosityLevel verbLevel)
TypeTo as(const TypeFrom &t)
std::string toString(const T &t)