Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_TriDiContainer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_TRIDICONTAINER_DEF_HPP
11 #define IFPACK2_TRIDICONTAINER_DEF_HPP
12 
13 #include "Teuchos_LAPACK.hpp"
14 
15 #ifdef HAVE_MPI
16 # include <mpi.h>
18 #else
19 # include "Teuchos_DefaultSerialComm.hpp"
20 #endif // HAVE_MPI
21 
22 
23 namespace Ifpack2 {
24 
25 template<class MatrixType, class LocalScalarType>
28  const Teuchos::Array<Teuchos::Array<LO> >& partitions,
30  bool pointIndexed) :
31  ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed),
32  ipiv_(this->blockRows_.size() * this->scalarsPerRow_),
33  scalarOffsets_ (this->numBlocks_)
34 {
36  ! matrix->hasColMap (), std::invalid_argument, "Ifpack2::TriDiContainer: "
37  "The constructor's input matrix must have a column Map.");
38  // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a
39  // different index base than zero?
40  //compute scalar array offsets (probably different from blockOffsets_)
41  LO scalarTotal = 0;
42  for(LO i = 0; i < this->numBlocks_; i++)
43  {
44  scalarOffsets_[i] = scalarTotal;
45  //actualBlockRows is how many scalars it takes to store the diagonal for block i
46  LO actualBlockRows = this->scalarsPerRow_ * this->blockSizes_[i];
47  if(actualBlockRows == 1)
48  {
49  scalarTotal += 1;
50  }
51  else
52  {
53  //this formula is exact for any block size of 1 or more
54  //includes 1 subdiagonal and 2 superdiagonals.
55  scalarTotal += 4 * (actualBlockRows - 1);
56  }
57  }
58  //Allocate scalar arrays
59  scalars_.resize(scalarTotal);
60  diagBlocks_.reserve(this->numBlocks_);
61  for(int i = 0; i < this->numBlocks_; i++)
62  {
63  diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[i], this->blockSizes_[i] * this->scalarsPerRow_);
64  }
65 }
66 
67 template<class MatrixType, class LocalScalarType>
70 
71 template<class MatrixType, class LocalScalarType>
73 {
74  for(int i = 0; i < this->numBlocks_; i++)
75  diagBlocks_[i].putScalar(Teuchos::ScalarTraits<LSC>::zero());
76  this->IsInitialized_ = true;
77  // We assume that if you called this method, you intend to recompute
78  // everything.
79  this->IsComputed_ = false;
80 }
81 
82 template<class MatrixType, class LocalScalarType>
84 {
85  #ifdef HAVE_IFPACK2_DEBUG
87  ipiv_.size () != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
88  "Ifpack2::TriDiContainer::compute: ipiv_ array has the wrong size. "
89  "Please report this bug to the Ifpack2 developers.");
90  #endif
91 
92  this->IsComputed_ = false;
93  if (! this->isInitialized ()) {
94  this->initialize ();
95  }
96  extract (); // extract the submatrices
97  factor (); // factor them
98  this->IsComputed_ = true;
99 }
100 
101 template<class MatrixType, class LocalScalarType>
103 {
104  using Teuchos::Array;
105  using Teuchos::ArrayView;
107  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
108  //To extract diagonal blocks, need to translate local rows to local columns.
109  //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
110  //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
111  //offset - blockOffsets_[b]: gives the column within block b.
112  //
113  //This provides the block and col within a block in O(1).
114  if(this->scalarsPerRow_ > 1)
115  {
116  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
117  for(int i = 0; i < this->numBlocks_; i++)
118  {
119  //Get the interval where block i is defined in blockRows_
120  LO blockStart = this->blockOffsets_[i];
121  LO blockEnd = blockStart + this->blockSizes_[i];
122  ArrayView<const LO> blockRows = this->getBlockRows(i);
123  //Set the lookup table entries for the columns appearing in block i.
124  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
125  //this is OK. The values updated here are only needed to process block i's entries.
126  for(size_t j = 0; j < size_t(blockRows.size()); j++)
127  {
128  LO localCol = this->translateRowToCol(blockRows[j]);
129  colToBlockOffset[localCol] = blockStart + j;
130  }
131  using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
132  using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
133  for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
134  {
135  //get a raw view of the whole block row
136  h_inds_type indices;
137  h_vals_type values;
138  LO inputRow = this->blockRows_[blockStart + blockRow];
139  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
140  LO numEntries = (LO) indices.size();
141  for(LO k = 0; k < numEntries; k++)
142  {
143  LO colOffset = colToBlockOffset[indices[k]];
144  if(blockStart <= colOffset && colOffset < blockEnd)
145  {
146  //This entry does appear in the diagonal block.
147  //(br, bc) identifies the scalar's position in the BlockCrs block.
148  //Convert this to (r, c) which is its position in the container block.
149  LO blockCol = colOffset - blockStart;
150  for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
151  {
152  for(LO br = 0; br < this->bcrsBlockSize_; br++)
153  {
154  LO r = this->bcrsBlockSize_ * blockRow + br;
155  LO c = this->bcrsBlockSize_ * blockCol + bc;
156  auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
157  if(val != zero)
158  diagBlocks_[i](r, c) = val;
159  }
160  }
161  }
162  }
163  }
164  }
165  }
166  else
167  {
168  //get the mapping from point-indexed matrix columns to offsets in blockRows_
169  //(this includes regular CrsMatrix columns, in which case bcrsBlockSize_ == 1)
170  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
171  for(int i = 0; i < this->numBlocks_; i++)
172  {
173  //Get the interval where block i is defined in blockRows_
174  LO blockStart = this->blockOffsets_[i];
175  LO blockEnd = blockStart + this->blockSizes_[i];
176  ArrayView<const LO> blockRows = this->getBlockRows(i);
177  //Set the lookup table entries for the columns appearing in block i.
178  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
179  //this is OK. The values updated here are only needed to process block i's entries.
180  for(size_t j = 0; j < size_t(blockRows.size()); j++)
181  {
182  //translateRowToCol will return the corresponding split column
183  LO localCol = this->translateRowToCol(blockRows[j]);
184  colToBlockOffset[localCol] = blockStart + j;
185  }
186  for(size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++)
187  {
188  //get a view of the split row
189  LO inputPointRow = this->blockRows_[blockStart + blockRow];
190  auto rowView = this->getInputRowView(inputPointRow);
191  for(size_t k = 0; k < rowView.size(); k++)
192  {
193  LO colOffset = colToBlockOffset[rowView.ind(k)];
194  if(blockStart <= colOffset && colOffset < blockEnd)
195  {
196  LO blockCol = colOffset - blockStart;
197  auto val = rowView.val(k);
198  if(val != zero)
199  diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
200  }
201  }
202  }
203  }
204  }
205 }
206 
207 template<class MatrixType, class LocalScalarType>
208 void TriDiContainer<MatrixType, LocalScalarType>::clearBlocks ()
209 {
210  diagBlocks_.clear();
211  scalars_.clear();
212  ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
213 }
214 
215 template<class MatrixType, class LocalScalarType>
216 void TriDiContainer<MatrixType, LocalScalarType>::factor ()
217 {
218  for(int i = 0; i < this->numBlocks_; i++)
219  {
221  int INFO = 0;
222  int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
223  lapack.GTTRF (diagBlocks_[i].numRowsCols (),
224  diagBlocks_[i].DL(),
225  diagBlocks_[i].D(),
226  diagBlocks_[i].DU(),
227  diagBlocks_[i].DU2(),
228  blockIpiv, &INFO);
229  // INFO < 0 is a bug.
231  INFO < 0, std::logic_error, "Ifpack2::TriDiContainer::factor: "
232  "LAPACK's _GTTRF (LU factorization with partial pivoting) was called "
233  "incorrectly. INFO = " << INFO << " < 0. "
234  "Please report this bug to the Ifpack2 developers.");
235  // INFO > 0 means the matrix is singular. This is probably an issue
236  // either with the choice of rows the rows we extracted, or with the
237  // input matrix itself.
239  INFO > 0, std::runtime_error, "Ifpack2::TriDiContainer::factor: "
240  "LAPACK's _GTTRF (LU factorization with partial pivoting) reports that the "
241  "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") "
242  "(one-based index i) is exactly zero. This probably means that the input "
243  "matrix has a singular diagonal block.");
244  }
245 }
246 
247 template<class MatrixType, class LocalScalarType>
249 solveBlock(ConstHostSubviewLocal X,
250  HostSubviewLocal Y,
251  int blockIndex,
252  Teuchos::ETransp mode,
253  LSC alpha,
254  LSC beta) const
255 {
257  size_t numVecs = X.extent(1);
258  size_t numRows = X.extent(0);
259 
260  #ifdef HAVE_IFPACK2_DEBUG
262  X.extent (0) != Y.extent (0),
263  std::logic_error, "Ifpack2::TriDiContainer::solveBlock: X and Y have "
264  "incompatible dimensions (" << X.extent (0) << " resp. "
265  << Y.extent (0) << "). Please report this bug to "
266  "the Ifpack2 developers.");
268  X.extent (0) != static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
269  std::logic_error, "Ifpack2::TriDiContainer::solveBlock: The input "
270  "multivector X has incompatible dimensions from those of the "
271  "inverse operator (" << X.extent (0) << " vs. "
272  << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols () : diagBlocks_[blockIndex].numRowsCols())
273  << "). Please report this bug to the Ifpack2 developers.");
274  TEUCHOS_TEST_FOR_EXCEPTION(
275  Y.extent (0) != static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
276  std::logic_error, "Ifpack2::TriDiContainer::solveBlock: The output "
277  "multivector Y has incompatible dimensions from those of the "
278  "inverse operator (" << Y.extent (0) << " vs. "
279  << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols ())
280  << "). Please report this bug to the Ifpack2 developers.");
281  #endif
282 
283  if(alpha == zero) { // don't need to solve the linear system
284  if(beta == zero) {
285  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
286  // any Inf or NaN values in Y (rather than multiplying them by
287  // zero, resulting in NaN values).
288  for(size_t j = 0; j < numVecs; j++)
289  for(size_t i = 0; i < numRows; i++)
290  Y(i, j) = zero;
291  }
292  else { // beta != 0
293  for(size_t j = 0; j < numVecs; j++)
294  for(size_t i = 0; i < numRows; i++)
295  Y(i, j) *= ISC(beta);
296  }
297  }
298  else { // alpha != 0; must solve the linear system
300  // If beta is nonzero or Y is not constant stride, we have to use
301  // a temporary output multivector. It gets a copy of X, since
302  // GETRS overwrites its (multi)vector input with its output.
303 
304  std::vector<LSC> yTemp(numVecs * numRows);
305  for(size_t j = 0; j < numVecs; j++)
306  {
307  for(size_t i = 0; i < numRows; i++)
308  yTemp[j * numRows + i] = X(i, j);
309  }
310 
311  int INFO = 0;
312  const char trans =
313  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
314  int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
315  lapack.GTTRS (trans,
316  diagBlocks_[blockIndex].numRowsCols(),
317  numVecs,
318  diagBlocks_[blockIndex].DL(),
319  diagBlocks_[blockIndex].D(),
320  diagBlocks_[blockIndex].DU(),
321  diagBlocks_[blockIndex].DU2(),
322  blockIpiv,
323  yTemp.data(),
324  numRows,
325  &INFO);
326 
327  if (beta != zero) {
328  for(size_t j = 0; j < numVecs; j++)
329  {
330  for(size_t i = 0; i < numRows; i++)
331  {
332  Y(i, j) *= ISC(beta);
333  Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
334  }
335  }
336  }
337  else {
338  for(size_t j = 0; j < numVecs; j++)
339  {
340  for(size_t i = 0; i < numRows; i++)
341  Y(i, j) = ISC(yTemp[j * numRows + i]);
342  }
343  }
344 
345  TEUCHOS_TEST_FOR_EXCEPTION(
346  INFO != 0, std::runtime_error, "Ifpack2::TriDiContainer::solveBlock: "
347  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
348  "failed with INFO = " << INFO << " != 0.");
349  }
350 }
351 
352 template<class MatrixType, class LocalScalarType>
353 std::ostream& TriDiContainer<MatrixType, LocalScalarType>::print(std::ostream& os) const
354 {
355  Teuchos::FancyOStream fos(Teuchos::rcp(&os,false));
356  fos.setOutputToRootOnly(0);
357  describe(fos);
358  return(os);
359 }
360 
361 template<class MatrixType, class LocalScalarType>
363 {
364  std::ostringstream oss;
366  if (this->isInitialized()) {
367  if (this->isComputed()) {
368  oss << "{status = initialized, computed";
369  }
370  else {
371  oss << "{status = initialized, not computed";
372  }
373  }
374  else {
375  oss << "{status = not initialized, not computed";
376  }
377 
378  oss << "}";
379  return oss.str();
380 }
381 
382 template<class MatrixType, class LocalScalarType>
383 void
386  const Teuchos::EVerbosityLevel verbLevel) const
387 {
388  using std::endl;
389  if(verbLevel==Teuchos::VERB_NONE) return;
390  os << "================================================================================" << endl;
391  os << "Ifpack2::TriDiContainer" << endl;
392  os << "Number of blocks = " << this->numBlocks_ << endl;
393  os << "isInitialized() = " << this->IsInitialized_ << endl;
394  os << "isComputed() = " << this->IsComputed_ << endl;
395  os << "================================================================================" << endl;
396  os << endl;
397 }
398 
399 template<class MatrixType, class LocalScalarType>
401 {
402  return "TriDi";
403 }
404 
405 #define IFPACK2_TRIDICONTAINER_INSTANT(S,LO,GO,N) \
406  template class Ifpack2::TriDiContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
407 
408 } // namespace Ifpack2
409 
410 #endif
Store and solve a local TriDi linear problem.
Definition: Ifpack2_TriDiContainer_decl.hpp:72
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_TriDiContainer_def.hpp:362
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_TriDiContainer_def.hpp:400
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition: Ifpack2_Container_decl.hpp:263
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition: Ifpack2_TriDiContainer_def.hpp:83
typename Kokkos::ArithTraits< SC >::val_type ISC
Internal representation of Scalar in Kokkos::View.
Definition: Ifpack2_Container_decl.hpp:102
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container_decl.hpp:259
void GTTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *dl, const ScalarType *d, const ScalarType *du, const ScalarType *du2, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
void GTTRF(const OrdinalType &n, ScalarType *dl, ScalarType *d, ScalarType *du, ScalarType *du2, OrdinalType *IPIV, OrdinalType *info) const
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:310
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LO scalarsPerRow_
Definition: Ifpack2_Container_decl.hpp:284
virtual ~TriDiContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_TriDiContainer_def.hpp:69
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_TriDiContainer_def.hpp:385
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_TriDiContainer_def.hpp:353
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
void resize(size_type new_size, const value_type &x=value_type())
TriDiContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_TriDiContainer_def.hpp:27
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_TriDiContainer_def.hpp:72
void solveBlock(ConstHostSubviewLocal X, HostSubviewLocal Y, int blockIndex, Teuchos::ETransp mode, LSC alpha, LSC beta) const
Definition: Ifpack2_TriDiContainer_def.hpp:249