43 #ifndef IFPACK2_TRIDICONTAINER_DEF_HPP
44 #define IFPACK2_TRIDICONTAINER_DEF_HPP
52 # include "Teuchos_DefaultSerialComm.hpp"
58 template<
class MatrixType,
class LocalScalarType>
64 ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed),
65 ipiv_(this->blockRows_.size() * this->scalarsPerRow_),
66 scalarOffsets_ (this->numBlocks_)
69 ! matrix->hasColMap (), std::invalid_argument,
"Ifpack2::TriDiContainer: "
70 "The constructor's input matrix must have a column Map.");
77 scalarOffsets_[i] = scalarTotal;
80 if(actualBlockRows == 1)
88 scalarTotal += 4 * (actualBlockRows - 1);
92 scalars_.
resize(scalarTotal);
93 diagBlocks_.reserve(this->numBlocks_);
100 template<
class MatrixType,
class LocalScalarType>
104 template<
class MatrixType,
class LocalScalarType>
107 for(
int i = 0; i < this->numBlocks_; i++)
109 this->IsInitialized_ =
true;
112 this->IsComputed_ =
false;
115 template<
class MatrixType,
class LocalScalarType>
118 #ifdef HAVE_IFPACK2_DEBUG
120 ipiv_.size () != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
121 "Ifpack2::TriDiContainer::compute: ipiv_ array has the wrong size. "
122 "Please report this bug to the Ifpack2 developers.");
125 this->IsComputed_ =
false;
126 if (! this->isInitialized ()) {
131 this->IsComputed_ =
true;
134 template<
class MatrixType,
class LocalScalarType>
147 if(this->scalarsPerRow_ > 1)
149 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
150 for(
int i = 0; i < this->numBlocks_; i++)
153 LO blockStart = this->blockOffsets_[i];
154 LO blockEnd = blockStart + this->blockSizes_[i];
155 ArrayView<const LO> blockRows = this->getBlockRows(i);
159 for(
size_t j = 0; j < size_t(blockRows.size()); j++)
161 LO localCol = this->translateRowToCol(blockRows[j]);
162 colToBlockOffset[localCol] = blockStart + j;
164 using h_inds_type =
typename block_crs_matrix_type::local_inds_host_view_type;
165 using h_vals_type =
typename block_crs_matrix_type::values_host_view_type;
166 for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
171 LO inputRow = this->blockRows_[blockStart + blockRow];
172 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
173 LO numEntries = (LO) indices.size();
174 for(LO k = 0; k < numEntries; k++)
176 LO colOffset = colToBlockOffset[indices[k]];
177 if(blockStart <= colOffset && colOffset < blockEnd)
182 LO blockCol = colOffset - blockStart;
183 for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
185 for(LO br = 0; br < this->bcrsBlockSize_; br++)
187 LO r = this->bcrsBlockSize_ * blockRow + br;
188 LO c = this->bcrsBlockSize_ * blockCol + bc;
189 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
191 diagBlocks_[i](r, c) = val;
203 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
204 for(
int i = 0; i < this->numBlocks_; i++)
207 LO blockStart = this->blockOffsets_[i];
208 LO blockEnd = blockStart + this->blockSizes_[i];
209 ArrayView<const LO> blockRows = this->getBlockRows(i);
213 for(
size_t j = 0; j < size_t(blockRows.size()); j++)
216 LO localCol = this->translateRowToCol(blockRows[j]);
217 colToBlockOffset[localCol] = blockStart + j;
219 for(
size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++)
222 LO inputPointRow = this->blockRows_[blockStart + blockRow];
223 auto rowView = this->getInputRowView(inputPointRow);
224 for(
size_t k = 0; k < rowView.size(); k++)
226 LO colOffset = colToBlockOffset[rowView.ind(k)];
227 if(blockStart <= colOffset && colOffset < blockEnd)
229 LO blockCol = colOffset - blockStart;
230 auto val = rowView.val(k);
232 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
240 template<
class MatrixType,
class LocalScalarType>
241 void TriDiContainer<MatrixType, LocalScalarType>::clearBlocks ()
245 ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
248 template<
class MatrixType,
class LocalScalarType>
249 void TriDiContainer<MatrixType, LocalScalarType>::factor ()
251 for(
int i = 0; i < this->numBlocks_; i++)
255 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
256 lapack.
GTTRF (diagBlocks_[i].numRowsCols (),
260 diagBlocks_[i].DU2(),
264 INFO < 0, std::logic_error,
"Ifpack2::TriDiContainer::factor: "
265 "LAPACK's _GTTRF (LU factorization with partial pivoting) was called "
266 "incorrectly. INFO = " << INFO <<
" < 0. "
267 "Please report this bug to the Ifpack2 developers.");
272 INFO > 0, std::runtime_error,
"Ifpack2::TriDiContainer::factor: "
273 "LAPACK's _GTTRF (LU factorization with partial pivoting) reports that the "
274 "computed U factor is exactly singular. U(" << INFO <<
"," << INFO <<
") "
275 "(one-based index i) is exactly zero. This probably means that the input "
276 "matrix has a singular diagonal block.");
280 template<
class MatrixType,
class LocalScalarType>
290 size_t numVecs = X.extent(1);
291 size_t numRows = X.extent(0);
293 #ifdef HAVE_IFPACK2_DEBUG
295 X.extent (0) != Y.extent (0),
296 std::logic_error,
"Ifpack2::TriDiContainer::solveBlock: X and Y have "
297 "incompatible dimensions (" << X.extent (0) <<
" resp. "
298 << Y.extent (0) <<
"). Please report this bug to "
299 "the Ifpack2 developers.");
301 X.extent (0) !=
static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
302 std::logic_error,
"Ifpack2::TriDiContainer::solveBlock: The input "
303 "multivector X has incompatible dimensions from those of the "
304 "inverse operator (" << X.extent (0) <<
" vs. "
305 << (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols () : diagBlocks_[blockIndex].numRowsCols())
306 <<
"). Please report this bug to the Ifpack2 developers.");
307 TEUCHOS_TEST_FOR_EXCEPTION(
308 Y.extent (0) !=
static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
309 std::logic_error,
"Ifpack2::TriDiContainer::solveBlock: The output "
310 "multivector Y has incompatible dimensions from those of the "
311 "inverse operator (" << Y.extent (0) <<
" vs. "
312 << (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols ())
313 <<
"). Please report this bug to the Ifpack2 developers.");
321 for(
size_t j = 0; j < numVecs; j++)
322 for(
size_t i = 0; i < numRows; i++)
326 for(
size_t j = 0; j < numVecs; j++)
327 for(
size_t i = 0; i < numRows; i++)
328 Y(i, j) *=
ISC(beta);
337 std::vector<LSC> yTemp(numVecs * numRows);
338 for(
size_t j = 0; j < numVecs; j++)
340 for(
size_t i = 0; i < numRows; i++)
341 yTemp[j * numRows + i] = X(i, j);
347 int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
349 diagBlocks_[blockIndex].numRowsCols(),
351 diagBlocks_[blockIndex].DL(),
352 diagBlocks_[blockIndex].D(),
353 diagBlocks_[blockIndex].DU(),
354 diagBlocks_[blockIndex].DU2(),
361 for(
size_t j = 0; j < numVecs; j++)
363 for(
size_t i = 0; i < numRows; i++)
365 Y(i, j) *=
ISC(beta);
366 Y(i, j) +=
ISC(alpha * yTemp[j * numRows + i]);
371 for(
size_t j = 0; j < numVecs; j++)
373 for(
size_t i = 0; i < numRows; i++)
374 Y(i, j) =
ISC(yTemp[j * numRows + i]);
378 TEUCHOS_TEST_FOR_EXCEPTION(
379 INFO != 0, std::runtime_error,
"Ifpack2::TriDiContainer::solveBlock: "
380 "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
381 "failed with INFO = " << INFO <<
" != 0.");
385 template<
class MatrixType,
class LocalScalarType>
394 template<
class MatrixType,
class LocalScalarType>
397 std::ostringstream oss;
399 if (this->isInitialized()) {
400 if (this->isComputed()) {
401 oss <<
"{status = initialized, computed";
404 oss <<
"{status = initialized, not computed";
408 oss <<
"{status = not initialized, not computed";
415 template<
class MatrixType,
class LocalScalarType>
423 os <<
"================================================================================" << endl;
424 os <<
"Ifpack2::TriDiContainer" << endl;
425 os <<
"Number of blocks = " << this->numBlocks_ << endl;
426 os <<
"isInitialized() = " << this->IsInitialized_ << endl;
427 os <<
"isComputed() = " << this->IsComputed_ << endl;
428 os <<
"================================================================================" << endl;
432 template<
class MatrixType,
class LocalScalarType>
438 #define IFPACK2_TRIDICONTAINER_INSTANT(S,LO,GO,N) \
439 template class Ifpack2::TriDiContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
Store and solve a local TriDi linear problem.
Definition: Ifpack2_TriDiContainer_decl.hpp:105
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_TriDiContainer_def.hpp:395
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_TriDiContainer_def.hpp:433
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition: Ifpack2_Container_decl.hpp:296
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition: Ifpack2_TriDiContainer_def.hpp:116
typename Kokkos::ArithTraits< SC >::val_type ISC
Internal representation of Scalar in Kokkos::View.
Definition: Ifpack2_Container_decl.hpp:135
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container_decl.hpp:292
void GTTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *dl, const ScalarType *d, const ScalarType *du, const ScalarType *du2, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
void GTTRF(const OrdinalType &n, ScalarType *dl, ScalarType *d, ScalarType *du, ScalarType *du2, OrdinalType *IPIV, OrdinalType *info) const
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:343
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LO scalarsPerRow_
Definition: Ifpack2_Container_decl.hpp:317
virtual ~TriDiContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_TriDiContainer_def.hpp:102
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_TriDiContainer_def.hpp:418
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_TriDiContainer_def.hpp:386
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
void resize(size_type new_size, const value_type &x=value_type())
TriDiContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_TriDiContainer_def.hpp:60
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_TriDiContainer_def.hpp:105
void solveBlock(ConstHostSubviewLocal X, HostSubviewLocal Y, int blockIndex, Teuchos::ETransp mode, LSC alpha, LSC beta) const
Definition: Ifpack2_TriDiContainer_def.hpp:282