43 #ifndef IFPACK2_BANDEDCONTAINER_DEF_HPP 
   44 #define IFPACK2_BANDEDCONTAINER_DEF_HPP 
   47 #include "Tpetra_CrsMatrix.hpp" 
   55 #  include "Teuchos_DefaultSerialComm.hpp" 
   60 template<
class MatrixType, 
class LocalScalarType>
 
   66   ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed),
 
   67   ipiv_(this->blockRows_.size() * this->scalarsPerRow_),
 
   68   kl_(this->numBlocks_, -1),
 
   69   ku_(this->numBlocks_, -1),
 
   70   scalarOffsets_(this->numBlocks_)
 
   73     ! matrix->hasColMap (), std::invalid_argument, 
"Ifpack2::BandedContainer: " 
   74     "The constructor's input matrix must have a column Map.");
 
   77 template<
class MatrixType, 
class LocalScalarType>
 
   81 template<
class MatrixType, 
class LocalScalarType>
 
   85   if(List.
isParameter(
"relaxation: banded container superdiagonals"))
 
   87     int ku = List.
get<
int>(
"relaxation: banded container superdiagonals");
 
   88     for(LO b = 0; b < this->numBlocks_; b++)
 
   91   if(List.
isParameter(
"relaxation: banded container subdiagonals"))
 
   93     int kl = List.
get<
int>(
"relaxation: banded container subdiagonals");
 
   94     for(LO b = 0; b < this->numBlocks_; b++)
 
   99 template<
class MatrixType, 
class LocalScalarType>
 
  107   size_t colToOffsetSize = this->inputMatrix_->getNodeNumCols();
 
  108   if(this->pointIndexed_)
 
  109     colToOffsetSize *= this->bcrsBlockSize_;
 
  110   Array<LO> colToBlockOffset(colToOffsetSize, INVALID);
 
  113   for(
int i = 0; i < this->numBlocks_; i++)
 
  118     if(this->scalarsPerRow_ > 1)
 
  121       LO blockStart = this->blockOffsets_[i];
 
  122       LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
 
  123       ArrayView<const LO> blockRows = this->getBlockRows(i);
 
  127       for(
size_t j = 0; j < size_t(blockRows.size()); j++)
 
  129         LO localCol = this->translateRowToCol(blockRows[j]);
 
  130         colToBlockOffset[localCol] = blockStart + j;
 
  132       for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
 
  138         LO inputRow = this->blockRows_[blockStart + blockRow];
 
  139         this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values, numEntries);
 
  140         for(LO k = 0; k < numEntries; k++)
 
  142           LO colOffset = colToBlockOffset[indices[k]];
 
  143           if(blockStart <= colOffset && colOffset < blockEnd)
 
  148             LO blockCol = colOffset - blockStart;
 
  149             for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
 
  151               for(LO br = 0; br < this->bcrsBlockSize_; br++)
 
  153                 LO r = this->bcrsBlockSize_ * blockRow + br;
 
  154                 LO c = this->bcrsBlockSize_ * blockCol + bc;
 
  168       LO blockStart = this->blockOffsets_[i];
 
  169       LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
 
  170       ArrayView<const LO> blockRows = this->getBlockRows(i);
 
  174       for(
size_t j = 0; j < size_t(blockRows.size()); j++)
 
  177         LO localCol = this->translateRowToCol(blockRows[j]);
 
  178         colToBlockOffset[localCol] = blockStart + j;
 
  180       for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
 
  183         LO inputSplitRow = this->blockRows_[blockStart + blockRow];
 
  184         auto rowView = this->getInputRowView(inputSplitRow);
 
  185         for(
size_t k = 0; k < rowView.size(); k++)
 
  187           LO colOffset = colToBlockOffset[rowView.ind(k)];
 
  188           if(blockStart <= colOffset && colOffset < blockEnd)
 
  190             LO blockCol = colOffset - blockStart;
 
  191             maxSub = std::max(maxSub, blockRow - blockCol);
 
  192             maxSuper = std::max(maxSuper, blockCol - blockRow);
 
  202 template<
class MatrixType, 
class LocalScalarType>
 
  214   for(LO b = 0; b < this->numBlocks_; b++)
 
  216     LO stride = 2 * kl_[b] + ku_[b] + 1;
 
  217     scalarOffsets_[b] = totalScalars;
 
  218     totalScalars += stride * this->blockSizes_[b] * this->scalarsPerRow_;
 
  220   scalars_.resize(totalScalars);
 
  221   for(
int b = 0; b < this->numBlocks_; b++)
 
  225     LO nrows = this->blockSizes_[b] * this->scalarsPerRow_;
 
  226     diagBlocks_.emplace_back(
Teuchos::View, scalars_.data() + scalarOffsets_[b], 2 * kl_[b] + ku_[b] + 1, nrows, nrows, kl_[b], kl_[b] + ku_[b]);
 
  229   std::fill (ipiv_.begin (), ipiv_.end (), 0);
 
  232   this->IsComputed_ = 
false;
 
  233   this->IsInitialized_ = 
true;
 
  236 template<
class MatrixType, 
class LocalScalarType>
 
  242     ipiv_.size () != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
 
  243     "Ifpack2::BandedContainer::compute: ipiv_ array has the wrong size.  " 
  244     "Please report this bug to the Ifpack2 developers.");
 
  246   this->IsComputed_ = 
false;
 
  247   if (!this->isInitialized ()) {
 
  254   this->IsComputed_ = 
true;
 
  257 template<
class MatrixType, 
class LocalScalarType>
 
  263   SC zero = STS::zero();
 
  271   if(this->scalarsPerRow_ > 1)
 
  273     Array<LO> colToBlockOffset(this->inputBlockMatrix_->getNodeNumCols(), INVALID);
 
  274     for(
int i = 0; i < this->numBlocks_; i++)
 
  277       LO blockStart = this->blockOffsets_[i];
 
  278       LO blockEnd = blockStart + this->blockSizes_[i];
 
  279       ArrayView<const LO> blockRows = this->getBlockRows(i);
 
  283       for(
size_t j = 0; j < size_t(blockRows.size()); j++)
 
  285         LO localCol = this->translateRowToCol(blockRows[j]);
 
  286         colToBlockOffset[localCol] = blockStart + j;
 
  288       for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
 
  294         LO inputRow = this->blockRows_[blockStart + blockRow];
 
  295         this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values, numEntries);
 
  296         for(LO k = 0; k < numEntries; k++)
 
  298           LO colOffset = colToBlockOffset[indices[k]];
 
  299           if(blockStart <= colOffset && colOffset < blockEnd)
 
  304             LO blockCol = colOffset - blockStart;
 
  305             for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
 
  307               for(LO br = 0; br < this->bcrsBlockSize_; br++)
 
  309                 LO r = this->bcrsBlockSize_ * blockRow + br;
 
  310                 LO c = this->bcrsBlockSize_ * blockCol + bc;
 
  311                 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
 
  313                   diagBlocks_[i](r, c) = val;
 
  325     Array<LO> colToBlockOffset(this->inputMatrix_->getNodeNumCols() * this->bcrsBlockSize_, INVALID);
 
  326     for(
int i = 0; i < this->numBlocks_; i++)
 
  329       LO blockStart = this->blockOffsets_[i];
 
  330       LO blockEnd = blockStart + this->blockSizes_[i];
 
  331       ArrayView<const LO> blockRows = this->getBlockRows(i);
 
  335       for(
size_t j = 0; j < size_t(blockRows.size()); j++)
 
  338         LO localCol = this->translateRowToCol(blockRows[j]);
 
  339         colToBlockOffset[localCol] = blockStart + j;
 
  341       for(
size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++)
 
  344         LO inputPointRow = this->blockRows_[blockStart + blockRow];
 
  345         auto rowView = this->getInputRowView(inputPointRow);
 
  346         for(
size_t k = 0; k < rowView.size(); k++)
 
  348           LO colOffset = colToBlockOffset[rowView.ind(k)];
 
  349           if(blockStart <= colOffset && colOffset < blockEnd)
 
  351             LO blockCol = colOffset - blockStart;
 
  352             auto val = rowView.val(k);
 
  354               diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
 
  362 template<
class MatrixType, 
class LocalScalarType>
 
  364 BandedContainer<MatrixType, LocalScalarType>::
 
  369   ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
 
  372 template<
class MatrixType, 
class LocalScalarType>
 
  374 BandedContainer<MatrixType, LocalScalarType>::
 
  380   for(
int i = 0; i < this->numBlocks_; i++)
 
  384                        "BandedContainer<T>::factor: Diagonal block is an empty SerialBandDenseMatrix<T>!");
 
  386                        "BandedContainer<T>::factor: Diagonal block needs kl additional superdiagonals for factorization! However, the number of superdiagonals is smaller than the number of subdiagonals!");
 
  387     int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
 
  388     lapack.
GBTRF (diagBlocks_[i].numRows(),
 
  389         diagBlocks_[i].numCols(),
 
  390         diagBlocks_[i].lowerBandwidth(),
 
  391         diagBlocks_[i].upperBandwidth() - diagBlocks_[i].lowerBandwidth(), 
 
  392         diagBlocks_[i].values(),
 
  393         diagBlocks_[i].stride(),
 
  399       INFO < 0, std::logic_error, 
"Ifpack2::BandedContainer::factor: " 
  400       "LAPACK's _GBTRF (LU factorization with partial pivoting) was called " 
  401       "incorrectly.  INFO = " << INFO << 
" < 0.  " 
  402       "Please report this bug to the Ifpack2 developers.");
 
  407       INFO > 0, std::runtime_error, 
"Ifpack2::BandedContainer::factor: " 
  408       "LAPACK's _GBTRF (LU factorization with partial pivoting) reports that the " 
  409       "computed U factor is exactly singular.  U(" << INFO << 
"," << INFO << 
") " 
  410       "(one-based index i) is exactly zero.  This probably means that the input " 
  411       "matrix has a singular diagonal block.");
 
  415 template<
class MatrixType, 
class LocalScalarType>
 
  417 BandedContainer<MatrixType, LocalScalarType>::
 
  418 solveBlock(HostSubviewLocal X,
 
  423            const LSC beta)
 const 
  425   #ifdef HAVE_IFPACK2_DEBUG 
  427     X.extent (0) != Y.extent (0),
 
  428     std::logic_error, 
"Ifpack2::BandedContainer::solveBlock: X and Y have " 
  429     "incompatible dimensions (" << X.extent (0) << 
" resp. " 
  430     << Y.extent (0) << 
").  Please report this bug to " 
  431     "the Ifpack2 developers.");
 
  433     X.extent (0) != 
static_cast<size_t> (mode == 
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows()),
 
  434     std::logic_error, 
"Ifpack2::BandedContainer::solveBlock: The input " 
  435     "multivector X has incompatible dimensions from those of the " 
  436     "inverse operator (" << X.extent (0) << 
" vs. " 
  437     << (mode == 
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows())
 
  438     << 
").  Please report this bug to the Ifpack2 developers.");
 
  440     Y.extent (0) != 
static_cast<size_t> (mode == 
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols()),
 
  441     std::logic_error, 
"Ifpack2::BandedContainer::solveBlock: The output " 
  442     "multivector Y has incompatible dimensions from those of the " 
  443     "inverse operator (" << Y.extent (0) << 
" vs. " 
  444     << (mode == 
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols())
 
  445     << 
").  Please report this bug to the Ifpack2 developers.");
 
  448   size_t numRows = X.extent (0);
 
  449   size_t numVecs = X.extent (1);
 
  457       for(
size_t j = 0; j < numVecs; j++)
 
  458         for(
size_t i = 0; i < numRows; i++)
 
  462       for(
size_t j = 0; j < numVecs; j++)
 
  463         for(
size_t i = 0; i < numRows; i++)
 
  464           Y(i, j) = beta * Y(i, j);
 
  472     std::vector<LSC> yTemp(numVecs * numRows);
 
  473     for(
size_t j = 0; j < numVecs; j++)
 
  475       for(
size_t i = 0; i < numRows; i++)
 
  476         yTemp[j * numRows + i] = X(i, j);
 
  482     const int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
 
  485         diagBlocks_[blockIndex].numCols(),
 
  486         diagBlocks_[blockIndex].lowerBandwidth(),
 
  488         diagBlocks_[blockIndex].upperBandwidth() - diagBlocks_[blockIndex].lowerBandwidth(),
 
  490         diagBlocks_[blockIndex].values(),
 
  491         diagBlocks_[blockIndex].stride(),
 
  498       for(
size_t j = 0; j < numVecs; j++)
 
  500         for(
size_t i = 0; i < numRows; i++)
 
  502           Y(i, j) *= ISC(beta);
 
  503           Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
 
  508       for(
size_t j = 0; j < numVecs; j++)
 
  510         for(
size_t i = 0; i < numRows; i++)
 
  511           Y(i, j) = ISC(yTemp[j * numRows + i]);
 
  516       INFO != 0, std::runtime_error, 
"Ifpack2::BandedContainer::solveBlock: " 
  517       "LAPACK's _GBTRS (solve using LU factorization with partial pivoting) " 
  518       "failed with INFO = " << INFO << 
" != 0.");
 
  522 template<
class MatrixType, 
class LocalScalarType>
 
  533 template<
class MatrixType, 
class LocalScalarType>
 
  538   std::ostringstream oss;
 
  540   if (this->isInitialized()) {
 
  541     if (this->isComputed()) {
 
  542       oss << 
"{status = initialized, computed";
 
  545       oss << 
"{status = initialized, not computed";
 
  549     oss << 
"{status = not initialized, not computed";
 
  555 template<
class MatrixType, 
class LocalScalarType>
 
  562   os << 
"================================================================================" << std::endl;
 
  563   os << 
"Ifpack2::BandedContainer" << std::endl;
 
  564   for(
int i = 0; i < this->numBlocks_; i++)
 
  566     os << 
"Block " << i << 
": Number of rows           = " << this->blockSizes_[i] << std::endl;
 
  567     os << 
"Block " << i << 
": Number of subdiagonals   = " << diagBlocks_[i].lowerBandwidth() << std::endl;
 
  568     os << 
"Block " << i << 
": Number of superdiagonals = " << diagBlocks_[i].upperBandwidth() << std::endl;
 
  570   os << 
"isInitialized()          = " << this->IsInitialized_ << std::endl;
 
  571   os << 
"isComputed()             = " << this->IsComputed_ << std::endl;
 
  572   os << 
"================================================================================" << std::endl;
 
  576 template<
class MatrixType, 
class LocalScalarType>
 
  584 #define IFPACK2_BANDEDCONTAINER_INSTANT(S,LO,GO,N) \ 
  585   template class Ifpack2::BandedContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >; 
  587 #endif // IFPACK2_BANDEDCONTAINER_HPP 
virtual void initialize() override
Do all set-up operations that only require matrix structure. 
Definition: Ifpack2_BandedContainer_def.hpp:205
 
T & get(const std::string &name, T def_value)
 
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes. 
Definition: Ifpack2_Container_decl.hpp:342
 
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
 
void GBTRF(const OrdinalType &m, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const 
 
virtual void compute() override
Initialize and compute each block. 
Definition: Ifpack2_BandedContainer_def.hpp:239
 
bool isParameter(const std::string &name) const 
 
virtual std::string description() const 
 
static std::string getName()
Get the name of this container type for Details::constructContainer() 
Definition: Ifpack2_BandedContainer_def.hpp:577
 
virtual std::ostream & print(std::ostream &os) const override
Print information about this object to the given output stream. 
Definition: Ifpack2_BandedContainer_def.hpp:525
 
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
 
virtual ~BandedContainer()
Destructor (declared virtual for memory safety of derived classes). 
Definition: Ifpack2_BandedContainer_def.hpp:79
 
void computeBandwidth()
Definition: Ifpack2_BandedContainer_def.hpp:101
 
void GBTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const 
 
BandedContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &, bool pointIndexed)
Constructor. 
Definition: Ifpack2_BandedContainer_def.hpp:62
 
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print the object with some verbosity level to the given FancyOStream. 
Definition: Ifpack2_BandedContainer_def.hpp:558
 
virtual void setParameters(const Teuchos::ParameterList &List) override
Set all necessary parameters (super- and sub-diagonal bandwidth) 
Definition: Ifpack2_BandedContainer_def.hpp:83
 
Store and solve a local Banded linear problem. 
Definition: Ifpack2_BandedContainer_decl.hpp:102
 
virtual std::string description() const override
A one-line description of this object. 
Definition: Ifpack2_BandedContainer_def.hpp:536