Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_DenseContainer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_DENSECONTAINER_DEF_HPP
11 #define IFPACK2_DENSECONTAINER_DEF_HPP
12 
13 #include "Tpetra_CrsMatrix.hpp"
14 #include "Teuchos_LAPACK.hpp"
15 #include "Tpetra_BlockMultiVector.hpp"
16 
17 #ifdef HAVE_MPI
18 # include <mpi.h>
20 #else
21 # include "Teuchos_DefaultSerialComm.hpp"
22 #endif // HAVE_MPI
23 
24 namespace Ifpack2 {
25 
26 template<class MatrixType, class LocalScalarType>
29  const Teuchos::Array<Teuchos::Array<LO> >& partitions,
31  bool pointIndexed) :
32  ContainerImpl<MatrixType, LocalScalarType> (matrix, partitions, pointIndexed),
33  scalarOffsets_ (this->numBlocks_)
34 {
36  !matrix->hasColMap(), std::invalid_argument, "Ifpack2::DenseContainer: "
37  "The constructor's input matrix must have a column Map.");
38 
39  //compute scalarOffsets_
40  GO totalScalars = 0;
41  for(LO i = 0; i < this->numBlocks_; i++)
42  {
43  scalarOffsets_[i] = totalScalars;
44  totalScalars += this->blockSizes_[i] * this->scalarsPerRow_ *
45  this->blockSizes_[i] * this->scalarsPerRow_;
46  }
47  scalars_.resize(totalScalars);
48  for(int i = 0; i < this->numBlocks_; i++)
49  {
50  LO denseRows = this->blockSizes_[i] * this->scalarsPerRow_;
51  //create square dense matrix (stride is same as rows and cols)
52  diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[i], denseRows, denseRows, denseRows);
53  }
54 
55  ipiv_.resize(this->blockRows_.size() * this->scalarsPerRow_);
56 }
57 
58 template<class MatrixType, class LocalScalarType>
61 
62 template<class MatrixType, class LocalScalarType>
63 void
66 {
67  // Fill the diagonal block and LU permutation array with zeros.
68  for(int i = 0; i < this->numBlocks_; i++)
69  diagBlocks_[i].putScalar(Teuchos::ScalarTraits<LSC>::zero());
70  std::fill (ipiv_.begin (), ipiv_.end (), 0);
71 
72  this->IsInitialized_ = true;
73  // We assume that if you called this method, you intend to recompute
74  // everything.
75  this->IsComputed_ = false;
76 }
77 
78 template<class MatrixType, class LocalScalarType>
79 void
82 {
83 // FIXME: I am commenting this out because it breaks block CRS support
84 // TEUCHOS_TEST_FOR_EXCEPTION(
85 // static_cast<size_t> (ipiv_.size ()) != numRows_, std::logic_error,
86 // "Ifpack2::DenseContainer::compute: ipiv_ array has the wrong size. "
87 // "Please report this bug to the Ifpack2 developers.");
88 
89  this->IsComputed_ = false;
90  if (!this->isInitialized ()) {
91  this->initialize();
92  }
93 
94  extract (); // Extract the submatrices
95  factor (); // Factor them
96  this->IsComputed_ = true;
97 }
98 
99 template<class MatrixType, class LocalScalarType>
101 {
102  using Teuchos::Array;
103  using Teuchos::ArrayView;
104  using STS = Teuchos::ScalarTraits<SC>;
105  SC zero = STS::zero();
106  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
107  //To extract diagonal blocks, need to translate local rows to local columns.
108  //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
109  //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
110  //offset - blockOffsets_[b]: gives the column within block b.
111  //
112  //This provides the block and col within a block in O(1).
113  if(this->scalarsPerRow_ > 1)
114  {
115  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
116  for(int i = 0; i < this->numBlocks_; i++)
117  {
118  //Get the interval where block i is defined in blockRows_
119  LO blockStart = this->blockOffsets_[i];
120  LO blockEnd = blockStart + this->blockSizes_[i];
121  ArrayView<const LO> blockRows = this->getBlockRows(i);
122  //Set the lookup table entries for the columns appearing in block i.
123  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
124  //this is OK. The values updated here are only needed to process block i's entries.
125  for(size_t j = 0; j < size_t(blockRows.size()); j++)
126  {
127  LO localCol = this->translateRowToCol(blockRows[j]);
128  colToBlockOffset[localCol] = blockStart + j;
129  }
130  using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
131  using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
132  for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
133  {
134  //get a raw view of the whole block row
135  h_inds_type indices;
136  h_vals_type values;
137  LO inputRow = this->blockRows_[blockStart + blockRow];
138  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
139  LO numEntries = (LO) indices.size();
140  for(LO k = 0; k < numEntries; k++)
141  {
142  LO colOffset = colToBlockOffset[indices[k]];
143  if(blockStart <= colOffset && colOffset < blockEnd)
144  {
145  //This entry does appear in the diagonal block.
146  //(br, bc) identifies the scalar's position in the BlockCrs block.
147  //Convert this to (r, c) which is its position in the container block.
148  LO blockCol = colOffset - blockStart;
149  for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
150  {
151  for(LO br = 0; br < this->bcrsBlockSize_; br++)
152  {
153  LO r = this->bcrsBlockSize_ * blockRow + br;
154  LO c = this->bcrsBlockSize_ * blockCol + bc;
155  auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
156  if(val != zero)
157  diagBlocks_[i](r, c) = val;
158  }
159  }
160  }
161  }
162  }
163  }
164  }
165  else
166  {
167  //get the mapping from point-indexed matrix columns to offsets in blockRows_
168  //(this includes regular CrsMatrix columns, in which case bcrsBlockSize_ == 1)
169  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
170  for(int i = 0; i < this->numBlocks_; i++)
171  {
172  //Get the interval where block i is defined in blockRows_
173  LO blockStart = this->blockOffsets_[i];
174  LO blockEnd = blockStart + this->blockSizes_[i];
175  ArrayView<const LO> blockRows = this->getBlockRows(i);
176  //Set the lookup table entries for the columns appearing in block i.
177  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
178  //this is OK. The values updated here are only needed to process block i's entries.
179  for(size_t j = 0; j < size_t(blockRows.size()); j++)
180  {
181  //translateRowToCol will return the corresponding split column
182  LO localCol = this->translateRowToCol(blockRows[j]);
183  colToBlockOffset[localCol] = blockStart + j;
184  }
185  for(size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++)
186  {
187  //get a view of the split row
188  LO inputPointRow = this->blockRows_[blockStart + blockRow];
189  auto rowView = this->getInputRowView(inputPointRow);
190  for(size_t k = 0; k < rowView.size(); k++)
191  {
192  LO colOffset = colToBlockOffset[rowView.ind(k)];
193  if(blockStart <= colOffset && colOffset < blockEnd)
194  {
195  LO blockCol = colOffset - blockStart;
196  auto val = rowView.val(k);
197  if(val != zero)
198  diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
199  }
200  }
201  }
202  }
203  }
204 }
205 
206 
207 template<class MatrixType, class LocalScalarType>
208 void
209 DenseContainer<MatrixType, LocalScalarType>::
210 factor ()
211 {
213  for(int i = 0; i < this->numBlocks_; i++)
214  {
215  int INFO = 0;
216  int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
217  lapack.GETRF(diagBlocks_[i].numRows(),
218  diagBlocks_[i].numCols(),
219  diagBlocks_[i].values(),
220  diagBlocks_[i].stride(),
221  blockIpiv, &INFO);
222  // INFO < 0 is a bug.
224  INFO < 0, std::logic_error, "Ifpack2::DenseContainer::factor: "
225  "LAPACK's _GETRF (LU factorization with partial pivoting) was called "
226  "incorrectly. INFO = " << INFO << " < 0. "
227  "Please report this bug to the Ifpack2 developers.");
228  // INFO > 0 means the matrix is singular. This is probably an issue
229  // either with the choice of rows the rows we extracted, or with the
230  // input matrix itself.
232  INFO > 0, std::runtime_error, "Ifpack2::DenseContainer::factor: "
233  "LAPACK's _GETRF (LU factorization with partial pivoting) reports that the "
234  "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") "
235  "(one-based index i) is exactly zero. This probably means that the input "
236  "matrix has a singular diagonal block.");
237  }
238 }
239 
240 template<class MatrixType, class LocalScalarType>
241 void
242 DenseContainer<MatrixType, LocalScalarType>::
243 solveBlock(ConstHostSubviewLocal X,
244  HostSubviewLocal Y,
245  int blockIndex,
246  Teuchos::ETransp mode,
247  LSC alpha,
248  LSC beta) const
249 {
250  #ifdef HAVE_IFPACK2_DEBUG
252  X.extent (0) != Y.extent (0),
253  std::logic_error, "Ifpack2::DenseContainer::solveBlock: X and Y have "
254  "incompatible dimensions (" << X.extent (0) << " resp. "
255  << Y.extent (0) << "). Please report this bug to "
256  "the Ifpack2 developers.");
257 
259  X.extent (1) != Y.extent(1),
260  std::logic_error, "Ifpack2::DenseContainer::solveBlock: X and Y have "
261  "incompatible numbers of vectors (" << X.extent (1) << " resp. "
262  << Y.extent (1) << "). Please report this bug to "
263  "the Ifpack2 developers.");
264  #endif
265 
266  typedef Teuchos::ScalarTraits<LSC> STS;
267  size_t numRows = X.extent(0);
268  size_t numVecs = X.extent(1);
269  if(alpha == STS::zero()) { // don't need to solve the linear system
270  if(beta == STS::zero()) {
271  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
272  // any Inf or NaN values in Y (rather than multiplying them by
273  // zero, resulting in NaN values).
274  for(size_t j = 0; j < numVecs; j++)
275  {
276  for(size_t i = 0; i < numRows; i++)
277  Y(i, j) = STS::zero();
278  }
279  }
280  else // beta != 0
281  for(size_t j = 0; j < numVecs; j++)
282  {
283  for(size_t i = 0; i < numRows; i++)
284  Y(i, j) = beta * Y(i, j);
285  }
286  }
287  else { // alpha != 0; must solve the linear system
289  // If beta is nonzero or Y is not constant stride, we have to use
290  // a temporary output multivector. It gets a (deep) copy of X,
291  // since GETRS overwrites its (multi)vector input with its output.
292  std::vector<LSC> yTemp(numVecs * numRows);
293  for(size_t j = 0; j < numVecs; j++)
294  {
295  for(size_t i = 0; i < numRows; i++)
296  yTemp[j * numRows + i] = X(i, j);
297  }
298 
299  int INFO = 0;
300  int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
301  const char trans =
302  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
303  lapack.GETRS (trans,
304  diagBlocks_[blockIndex].numRows (),
305  numVecs,
306  diagBlocks_[blockIndex].values (),
307  diagBlocks_[blockIndex].stride (),
308  blockIpiv,
309  yTemp.data(),
310  numRows,
311  &INFO);
312 
313  if (beta != STS::zero ()) {
314  for(size_t j = 0; j < numVecs; j++)
315  {
316  for(size_t i = 0; i < numRows; i++)
317  {
318  Y(i, j) *= ISC(beta);
319  Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
320  }
321  }
322  }
323  else {
324  for(size_t j = 0; j < numVecs; j++)
325  {
326  for(size_t i = 0; i < numRows; i++)
327  Y(i, j) = ISC(yTemp[j * numRows + i]);
328  }
329  }
330 
332  INFO != 0, std::runtime_error, "Ifpack2::DenseContainer::solveBlock: "
333  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
334  "failed with INFO = " << INFO << " != 0.");
335  }
336 }
337 
338 template<class MatrixType, class LocalScalarType>
339 std::ostream&
341 print (std::ostream& os) const
342 {
343  Teuchos::FancyOStream fos (Teuchos::rcpFromRef (os));
344  fos.setOutputToRootOnly (0);
345  this->describe (fos);
346  return os;
347 }
348 
349 template<class MatrixType, class LocalScalarType>
350 std::string
352 description () const
353 {
354  std::ostringstream oss;
355  oss << "Ifpack::DenseContainer: ";
356  if (this->isInitialized()) {
357  if (this->isComputed()) {
358  oss << "{status = initialized, computed";
359  }
360  else {
361  oss << "{status = initialized, not computed";
362  }
363  }
364  else {
365  oss << "{status = not initialized, not computed";
366  }
367 
368  oss << "}";
369  return oss.str();
370 }
371 
372 template<class MatrixType, class LocalScalarType>
373 void
376  const Teuchos::EVerbosityLevel verbLevel) const
377 {
378  using std::endl;
379  if(verbLevel==Teuchos::VERB_NONE) return;
380  os << "================================================================================" << endl;
381  os << "Ifpack2::DenseContainer" << endl;
382  for(int i = 0; i < this->numBlocks_; i++)
383  {
384  os << "Block " << i << " number of rows = " << this->blockSizes_[i] << endl;
385  }
386  os << "isInitialized() = " << this->IsInitialized_ << endl;
387  os << "isComputed() = " << this->IsComputed_ << endl;
388  os << "================================================================================" << endl;
389  os << endl;
390 }
391 
392 template<class MatrixType, class LocalScalarType>
394 {
395  diagBlocks_.clear();
396  scalars_.clear();
398 }
399 
400 template<class MatrixType, class LocalScalarType>
402 {
403  return "Dense";
404 }
405 
406 } // namespace Ifpack2
407 
408 // There's no need to instantiate for CrsMatrix too. All Ifpack2
409 // preconditioners can and should do dynamic casts if they need a type
410 // more specific than RowMatrix.
411 
412 #define IFPACK2_DENSECONTAINER_INSTANT(S,LO,GO,N) \
413  template class Ifpack2::DenseContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
414 
415 #endif // IFPACK2_DENSECONTAINER_DEF_HPP
Teuchos::Array< LO > blockRows_
Local indices of the rows of the input matrix that belong to this block.
Definition: Ifpack2_Container_decl.hpp:261
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition: Ifpack2_Container_decl.hpp:263
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container_decl.hpp:259
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:310
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition: Ifpack2_DenseContainer_def.hpp:81
LO scalarsPerRow_
Definition: Ifpack2_Container_decl.hpp:284
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_DenseContainer_def.hpp:401
DenseContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_DenseContainer_def.hpp:28
void GETRF(const OrdinalType &m, const OrdinalType &n, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const
Store and solve a local dense linear problem.
Definition: Ifpack2_DenseContainer_decl.hpp:70
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_DenseContainer_def.hpp:341
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_DenseContainer_def.hpp:375
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
void resize(size_type new_size, const value_type &x=value_type())
virtual ~DenseContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_DenseContainer_def.hpp:60
void GETRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
size_type size() const
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_DenseContainer_def.hpp:352
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_DenseContainer_def.hpp:65