43 #ifndef IFPACK2_BANDEDCONTAINER_DEF_HPP
44 #define IFPACK2_BANDEDCONTAINER_DEF_HPP
47 #include "Tpetra_CrsMatrix.hpp"
55 # include "Teuchos_DefaultSerialComm.hpp"
60 template<
class MatrixType,
class LocalScalarType>
66 ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed),
67 ipiv_(this->blockRows_.size() * this->scalarsPerRow_),
68 kl_(this->numBlocks_, -1),
69 ku_(this->numBlocks_, -1),
70 scalarOffsets_(this->numBlocks_)
73 ! matrix->hasColMap (), std::invalid_argument,
"Ifpack2::BandedContainer: "
74 "The constructor's input matrix must have a column Map.");
77 template<
class MatrixType,
class LocalScalarType>
81 template<
class MatrixType,
class LocalScalarType>
85 if(List.
isParameter(
"relaxation: banded container superdiagonals"))
87 int ku = List.
get<
int>(
"relaxation: banded container superdiagonals");
88 for(LO b = 0; b < this->numBlocks_; b++)
91 if(List.
isParameter(
"relaxation: banded container subdiagonals"))
93 int kl = List.
get<
int>(
"relaxation: banded container subdiagonals");
94 for(LO b = 0; b < this->numBlocks_; b++)
99 template<
class MatrixType,
class LocalScalarType>
107 size_t colToOffsetSize = this->inputMatrix_->getLocalNumCols();
108 if(this->pointIndexed_)
109 colToOffsetSize *= this->bcrsBlockSize_;
110 Array<LO> colToBlockOffset(colToOffsetSize, INVALID);
113 for(
int i = 0; i < this->numBlocks_; i++)
118 if(this->scalarsPerRow_ > 1)
121 LO blockStart = this->blockOffsets_[i];
122 LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
123 ArrayView<const LO> blockRows = this->getBlockRows(i);
127 for(
size_t j = 0; j < size_t(blockRows.size()); j++)
129 LO localCol = this->translateRowToCol(blockRows[j]);
130 colToBlockOffset[localCol] = blockStart + j;
133 using h_inds_type =
typename block_crs_matrix_type::local_inds_host_view_type;
134 using h_vals_type =
typename block_crs_matrix_type::values_host_view_type;
135 for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
140 LO inputRow = this->blockRows_[blockStart + blockRow];
141 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
142 LO numEntries = (LO) indices.size();
143 for(LO k = 0; k < numEntries; k++)
145 LO colOffset = colToBlockOffset[indices[k]];
146 if(blockStart <= colOffset && colOffset < blockEnd)
151 LO blockCol = colOffset - blockStart;
152 for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
154 for(LO br = 0; br < this->bcrsBlockSize_; br++)
156 LO r = this->bcrsBlockSize_ * blockRow + br;
157 LO c = this->bcrsBlockSize_ * blockCol + bc;
171 LO blockStart = this->blockOffsets_[i];
172 LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
173 ArrayView<const LO> blockRows = this->getBlockRows(i);
177 for(
size_t j = 0; j < size_t(blockRows.size()); j++)
180 LO localCol = this->translateRowToCol(blockRows[j]);
181 colToBlockOffset[localCol] = blockStart + j;
183 for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
186 LO inputSplitRow = this->blockRows_[blockStart + blockRow];
187 auto rowView = this->getInputRowView(inputSplitRow);
188 for(
size_t k = 0; k < rowView.size(); k++)
190 LO colOffset = colToBlockOffset[rowView.ind(k)];
191 if(blockStart <= colOffset && colOffset < blockEnd)
193 LO blockCol = colOffset - blockStart;
194 maxSub = std::max(maxSub, blockRow - blockCol);
195 maxSuper = std::max(maxSuper, blockCol - blockRow);
205 template<
class MatrixType,
class LocalScalarType>
217 for(LO b = 0; b < this->numBlocks_; b++)
219 LO stride = 2 * kl_[b] + ku_[b] + 1;
220 scalarOffsets_[b] = totalScalars;
221 totalScalars += stride * this->blockSizes_[b] * this->scalarsPerRow_;
223 scalars_.resize(totalScalars);
224 for(
int b = 0; b < this->numBlocks_; b++)
228 LO nrows = this->blockSizes_[b] * this->scalarsPerRow_;
229 diagBlocks_.emplace_back(
Teuchos::View, scalars_.data() + scalarOffsets_[b], 2 * kl_[b] + ku_[b] + 1, nrows, nrows, kl_[b], kl_[b] + ku_[b]);
232 std::fill (ipiv_.begin (), ipiv_.end (), 0);
235 this->IsComputed_ =
false;
236 this->IsInitialized_ =
true;
239 template<
class MatrixType,
class LocalScalarType>
245 ipiv_.size () != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
246 "Ifpack2::BandedContainer::compute: ipiv_ array has the wrong size. "
247 "Please report this bug to the Ifpack2 developers.");
249 this->IsComputed_ =
false;
250 if (!this->isInitialized ()) {
257 this->IsComputed_ =
true;
260 template<
class MatrixType,
class LocalScalarType>
266 SC zero = STS::zero();
274 if(this->scalarsPerRow_ > 1)
276 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
277 for(
int i = 0; i < this->numBlocks_; i++)
280 LO blockStart = this->blockOffsets_[i];
281 LO blockEnd = blockStart + this->blockSizes_[i];
282 ArrayView<const LO> blockRows = this->getBlockRows(i);
286 for(
size_t j = 0; j < size_t(blockRows.size()); j++)
288 LO localCol = this->translateRowToCol(blockRows[j]);
289 colToBlockOffset[localCol] = blockStart + j;
291 using h_inds_type =
typename block_crs_matrix_type::local_inds_host_view_type;
292 using h_vals_type =
typename block_crs_matrix_type::values_host_view_type;
293 for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
298 LO inputRow = this->blockRows_[blockStart + blockRow];
299 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
300 LO numEntries = (LO) indices.size();
301 for(LO k = 0; k < numEntries; k++)
303 LO colOffset = colToBlockOffset[indices[k]];
304 if(blockStart <= colOffset && colOffset < blockEnd)
309 LO blockCol = colOffset - blockStart;
310 for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
312 for(LO br = 0; br < this->bcrsBlockSize_; br++)
314 LO r = this->bcrsBlockSize_ * blockRow + br;
315 LO c = this->bcrsBlockSize_ * blockCol + bc;
316 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
318 diagBlocks_[i](r, c) = val;
330 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
331 for(
int i = 0; i < this->numBlocks_; i++)
334 LO blockStart = this->blockOffsets_[i];
335 LO blockEnd = blockStart + this->blockSizes_[i];
336 ArrayView<const LO> blockRows = this->getBlockRows(i);
340 for(
size_t j = 0; j < size_t(blockRows.size()); j++)
343 LO localCol = this->translateRowToCol(blockRows[j]);
344 colToBlockOffset[localCol] = blockStart + j;
346 for(
size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++)
349 LO inputPointRow = this->blockRows_[blockStart + blockRow];
350 auto rowView = this->getInputRowView(inputPointRow);
351 for(
size_t k = 0; k < rowView.size(); k++)
353 LO colOffset = colToBlockOffset[rowView.ind(k)];
354 if(blockStart <= colOffset && colOffset < blockEnd)
356 LO blockCol = colOffset - blockStart;
357 auto val = rowView.val(k);
359 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
367 template<
class MatrixType,
class LocalScalarType>
369 BandedContainer<MatrixType, LocalScalarType>::
374 ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
377 template<
class MatrixType,
class LocalScalarType>
379 BandedContainer<MatrixType, LocalScalarType>::
385 for(
int i = 0; i < this->numBlocks_; i++)
389 "BandedContainer<T>::factor: Diagonal block is an empty SerialBandDenseMatrix<T>!");
391 "BandedContainer<T>::factor: Diagonal block needs kl additional superdiagonals for factorization! However, the number of superdiagonals is smaller than the number of subdiagonals!");
392 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
393 lapack.
GBTRF (diagBlocks_[i].numRows(),
394 diagBlocks_[i].numCols(),
395 diagBlocks_[i].lowerBandwidth(),
396 diagBlocks_[i].upperBandwidth() - diagBlocks_[i].lowerBandwidth(),
397 diagBlocks_[i].values(),
398 diagBlocks_[i].stride(),
404 INFO < 0, std::logic_error,
"Ifpack2::BandedContainer::factor: "
405 "LAPACK's _GBTRF (LU factorization with partial pivoting) was called "
406 "incorrectly. INFO = " << INFO <<
" < 0. "
407 "Please report this bug to the Ifpack2 developers.");
412 INFO > 0, std::runtime_error,
"Ifpack2::BandedContainer::factor: "
413 "LAPACK's _GBTRF (LU factorization with partial pivoting) reports that the "
414 "computed U factor is exactly singular. U(" << INFO <<
"," << INFO <<
") "
415 "(one-based index i) is exactly zero. This probably means that the input "
416 "matrix has a singular diagonal block.");
420 template<
class MatrixType,
class LocalScalarType>
422 BandedContainer<MatrixType, LocalScalarType>::
423 solveBlock(ConstHostSubviewLocal X,
428 const LSC beta)
const
430 #ifdef HAVE_IFPACK2_DEBUG
432 X.extent (0) != Y.extent (0),
433 std::logic_error,
"Ifpack2::BandedContainer::solveBlock: X and Y have "
434 "incompatible dimensions (" << X.extent (0) <<
" resp. "
435 << Y.extent (0) <<
"). Please report this bug to "
436 "the Ifpack2 developers.");
438 X.extent (0) !=
static_cast<size_t> (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows()),
439 std::logic_error,
"Ifpack2::BandedContainer::solveBlock: The input "
440 "multivector X has incompatible dimensions from those of the "
441 "inverse operator (" << X.extent (0) <<
" vs. "
442 << (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows())
443 <<
"). Please report this bug to the Ifpack2 developers.");
445 Y.extent (0) !=
static_cast<size_t> (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols()),
446 std::logic_error,
"Ifpack2::BandedContainer::solveBlock: The output "
447 "multivector Y has incompatible dimensions from those of the "
448 "inverse operator (" << Y.extent (0) <<
" vs. "
449 << (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols())
450 <<
"). Please report this bug to the Ifpack2 developers.");
453 size_t numRows = X.extent (0);
454 size_t numVecs = X.extent (1);
462 for(
size_t j = 0; j < numVecs; j++)
463 for(
size_t i = 0; i < numRows; i++)
467 for(
size_t j = 0; j < numVecs; j++)
468 for(
size_t i = 0; i < numRows; i++)
469 Y(i, j) = beta * Y(i, j);
477 std::vector<LSC> yTemp(numVecs * numRows);
478 for(
size_t j = 0; j < numVecs; j++)
480 for(
size_t i = 0; i < numRows; i++)
481 yTemp[j * numRows + i] = X(i, j);
487 const int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
490 diagBlocks_[blockIndex].numCols(),
491 diagBlocks_[blockIndex].lowerBandwidth(),
493 diagBlocks_[blockIndex].upperBandwidth() - diagBlocks_[blockIndex].lowerBandwidth(),
495 diagBlocks_[blockIndex].values(),
496 diagBlocks_[blockIndex].stride(),
503 for(
size_t j = 0; j < numVecs; j++)
505 for(
size_t i = 0; i < numRows; i++)
507 Y(i, j) *= ISC(beta);
508 Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
513 for(
size_t j = 0; j < numVecs; j++)
515 for(
size_t i = 0; i < numRows; i++)
516 Y(i, j) = ISC(yTemp[j * numRows + i]);
521 INFO != 0, std::runtime_error,
"Ifpack2::BandedContainer::solveBlock: "
522 "LAPACK's _GBTRS (solve using LU factorization with partial pivoting) "
523 "failed with INFO = " << INFO <<
" != 0.");
527 template<
class MatrixType,
class LocalScalarType>
538 template<
class MatrixType,
class LocalScalarType>
543 std::ostringstream oss;
545 if (this->isInitialized()) {
546 if (this->isComputed()) {
547 oss <<
"{status = initialized, computed";
550 oss <<
"{status = initialized, not computed";
554 oss <<
"{status = not initialized, not computed";
560 template<
class MatrixType,
class LocalScalarType>
567 os <<
"================================================================================" << std::endl;
568 os <<
"Ifpack2::BandedContainer" << std::endl;
569 for(
int i = 0; i < this->numBlocks_; i++)
571 os <<
"Block " << i <<
": Number of rows = " << this->blockSizes_[i] << std::endl;
572 os <<
"Block " << i <<
": Number of subdiagonals = " << diagBlocks_[i].lowerBandwidth() << std::endl;
573 os <<
"Block " << i <<
": Number of superdiagonals = " << diagBlocks_[i].upperBandwidth() << std::endl;
575 os <<
"isInitialized() = " << this->IsInitialized_ << std::endl;
576 os <<
"isComputed() = " << this->IsComputed_ << std::endl;
577 os <<
"================================================================================" << std::endl;
581 template<
class MatrixType,
class LocalScalarType>
589 #define IFPACK2_BANDEDCONTAINER_INSTANT(S,LO,GO,N) \
590 template class Ifpack2::BandedContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
592 #endif // IFPACK2_BANDEDCONTAINER_HPP
virtual void initialize() override
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_BandedContainer_def.hpp:208
T & get(const std::string &name, T def_value)
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:343
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void GBTRF(const OrdinalType &m, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const
virtual void compute() override
Initialize and compute each block.
Definition: Ifpack2_BandedContainer_def.hpp:242
bool isParameter(const std::string &name) const
virtual std::string description() const
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_BandedContainer_def.hpp:582
virtual std::ostream & print(std::ostream &os) const override
Print information about this object to the given output stream.
Definition: Ifpack2_BandedContainer_def.hpp:530
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
virtual ~BandedContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_BandedContainer_def.hpp:79
void computeBandwidth()
Definition: Ifpack2_BandedContainer_def.hpp:101
void GBTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
BandedContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &, bool pointIndexed)
Constructor.
Definition: Ifpack2_BandedContainer_def.hpp:62
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_BandedContainer_def.hpp:563
virtual void setParameters(const Teuchos::ParameterList &List) override
Set all necessary parameters (super- and sub-diagonal bandwidth)
Definition: Ifpack2_BandedContainer_def.hpp:83
Store and solve a local Banded linear problem.
Definition: Ifpack2_BandedContainer_decl.hpp:102
virtual std::string description() const override
A one-line description of this object.
Definition: Ifpack2_BandedContainer_def.hpp:541