Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_TriDiContainer_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_TRIDICONTAINER_DEF_HPP
44 #define IFPACK2_TRIDICONTAINER_DEF_HPP
45 
46 #include "Teuchos_LAPACK.hpp"
47 
48 #ifdef HAVE_MPI
49 # include <mpi.h>
51 #else
52 # include "Teuchos_DefaultSerialComm.hpp"
53 #endif // HAVE_MPI
54 
55 
56 namespace Ifpack2 {
57 
58 template<class MatrixType, class LocalScalarType>
61  const Teuchos::Array<Teuchos::Array<LO> >& partitions,
63  bool pointIndexed) :
64  ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed),
65  ipiv_(this->blockRows_.size() * this->scalarsPerRow_),
66  scalarOffsets_ (this->numBlocks_)
67 {
69  ! matrix->hasColMap (), std::invalid_argument, "Ifpack2::TriDiContainer: "
70  "The constructor's input matrix must have a column Map.");
71  // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a
72  // different index base than zero?
73  //compute scalar array offsets (probably different from blockOffsets_)
74  LO scalarTotal = 0;
75  for(LO i = 0; i < this->numBlocks_; i++)
76  {
77  scalarOffsets_[i] = scalarTotal;
78  //actualBlockRows is how many scalars it takes to store the diagonal for block i
79  LO actualBlockRows = this->scalarsPerRow_ * this->blockSizes_[i];
80  if(actualBlockRows == 1)
81  {
82  scalarTotal += 1;
83  }
84  else
85  {
86  //this formula is exact for any block size of 1 or more
87  //includes 1 subdiagonal and 2 superdiagonals.
88  scalarTotal += 4 * (actualBlockRows - 1);
89  }
90  }
91  //Allocate scalar arrays
92  scalars_.resize(scalarTotal);
93  diagBlocks_.reserve(this->numBlocks_);
94  for(int i = 0; i < this->numBlocks_; i++)
95  {
96  diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[i], this->blockSizes_[i] * this->scalarsPerRow_);
97  }
98 }
99 
100 template<class MatrixType, class LocalScalarType>
103 
104 template<class MatrixType, class LocalScalarType>
106 {
107  for(int i = 0; i < this->numBlocks_; i++)
108  diagBlocks_[i].putScalar(Teuchos::ScalarTraits<LSC>::zero());
109  this->IsInitialized_ = true;
110  // We assume that if you called this method, you intend to recompute
111  // everything.
112  this->IsComputed_ = false;
113 }
114 
115 template<class MatrixType, class LocalScalarType>
117 {
118  #ifdef HAVE_IFPACK2_DEBUG
120  ipiv_.size () != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
121  "Ifpack2::TriDiContainer::compute: ipiv_ array has the wrong size. "
122  "Please report this bug to the Ifpack2 developers.");
123  #endif
124 
125  this->IsComputed_ = false;
126  if (! this->isInitialized ()) {
127  this->initialize ();
128  }
129  extract (); // extract the submatrices
130  factor (); // factor them
131  this->IsComputed_ = true;
132 }
133 
134 template<class MatrixType, class LocalScalarType>
136 {
137  using Teuchos::Array;
138  using Teuchos::ArrayView;
140  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
141  //To extract diagonal blocks, need to translate local rows to local columns.
142  //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
143  //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
144  //offset - blockOffsets_[b]: gives the column within block b.
145  //
146  //This provides the block and col within a block in O(1).
147  if(this->scalarsPerRow_ > 1)
148  {
149  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
150  for(int i = 0; i < this->numBlocks_; i++)
151  {
152  //Get the interval where block i is defined in blockRows_
153  LO blockStart = this->blockOffsets_[i];
154  LO blockEnd = blockStart + this->blockSizes_[i];
155  ArrayView<const LO> blockRows = this->getBlockRows(i);
156  //Set the lookup table entries for the columns appearing in block i.
157  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
158  //this is OK. The values updated here are only needed to process block i's entries.
159  for(size_t j = 0; j < size_t(blockRows.size()); j++)
160  {
161  LO localCol = this->translateRowToCol(blockRows[j]);
162  colToBlockOffset[localCol] = blockStart + j;
163  }
164  using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
165  using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
166  for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
167  {
168  //get a raw view of the whole block row
169  h_inds_type indices;
170  h_vals_type values;
171  LO inputRow = this->blockRows_[blockStart + blockRow];
172  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
173  LO numEntries = (LO) indices.size();
174  for(LO k = 0; k < numEntries; k++)
175  {
176  LO colOffset = colToBlockOffset[indices[k]];
177  if(blockStart <= colOffset && colOffset < blockEnd)
178  {
179  //This entry does appear in the diagonal block.
180  //(br, bc) identifies the scalar's position in the BlockCrs block.
181  //Convert this to (r, c) which is its position in the container block.
182  LO blockCol = colOffset - blockStart;
183  for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
184  {
185  for(LO br = 0; br < this->bcrsBlockSize_; br++)
186  {
187  LO r = this->bcrsBlockSize_ * blockRow + br;
188  LO c = this->bcrsBlockSize_ * blockCol + bc;
189  auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
190  if(val != zero)
191  diagBlocks_[i](r, c) = val;
192  }
193  }
194  }
195  }
196  }
197  }
198  }
199  else
200  {
201  //get the mapping from point-indexed matrix columns to offsets in blockRows_
202  //(this includes regular CrsMatrix columns, in which case bcrsBlockSize_ == 1)
203  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
204  for(int i = 0; i < this->numBlocks_; i++)
205  {
206  //Get the interval where block i is defined in blockRows_
207  LO blockStart = this->blockOffsets_[i];
208  LO blockEnd = blockStart + this->blockSizes_[i];
209  ArrayView<const LO> blockRows = this->getBlockRows(i);
210  //Set the lookup table entries for the columns appearing in block i.
211  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
212  //this is OK. The values updated here are only needed to process block i's entries.
213  for(size_t j = 0; j < size_t(blockRows.size()); j++)
214  {
215  //translateRowToCol will return the corresponding split column
216  LO localCol = this->translateRowToCol(blockRows[j]);
217  colToBlockOffset[localCol] = blockStart + j;
218  }
219  for(size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++)
220  {
221  //get a view of the split row
222  LO inputPointRow = this->blockRows_[blockStart + blockRow];
223  auto rowView = this->getInputRowView(inputPointRow);
224  for(size_t k = 0; k < rowView.size(); k++)
225  {
226  LO colOffset = colToBlockOffset[rowView.ind(k)];
227  if(blockStart <= colOffset && colOffset < blockEnd)
228  {
229  LO blockCol = colOffset - blockStart;
230  auto val = rowView.val(k);
231  if(val != zero)
232  diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
233  }
234  }
235  }
236  }
237  }
238 }
239 
240 template<class MatrixType, class LocalScalarType>
241 void TriDiContainer<MatrixType, LocalScalarType>::clearBlocks ()
242 {
243  diagBlocks_.clear();
244  scalars_.clear();
245  ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
246 }
247 
248 template<class MatrixType, class LocalScalarType>
249 void TriDiContainer<MatrixType, LocalScalarType>::factor ()
250 {
251  for(int i = 0; i < this->numBlocks_; i++)
252  {
254  int INFO = 0;
255  int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
256  lapack.GTTRF (diagBlocks_[i].numRowsCols (),
257  diagBlocks_[i].DL(),
258  diagBlocks_[i].D(),
259  diagBlocks_[i].DU(),
260  diagBlocks_[i].DU2(),
261  blockIpiv, &INFO);
262  // INFO < 0 is a bug.
264  INFO < 0, std::logic_error, "Ifpack2::TriDiContainer::factor: "
265  "LAPACK's _GTTRF (LU factorization with partial pivoting) was called "
266  "incorrectly. INFO = " << INFO << " < 0. "
267  "Please report this bug to the Ifpack2 developers.");
268  // INFO > 0 means the matrix is singular. This is probably an issue
269  // either with the choice of rows the rows we extracted, or with the
270  // input matrix itself.
272  INFO > 0, std::runtime_error, "Ifpack2::TriDiContainer::factor: "
273  "LAPACK's _GTTRF (LU factorization with partial pivoting) reports that the "
274  "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") "
275  "(one-based index i) is exactly zero. This probably means that the input "
276  "matrix has a singular diagonal block.");
277  }
278 }
279 
280 template<class MatrixType, class LocalScalarType>
282 solveBlock(ConstHostSubviewLocal X,
283  HostSubviewLocal Y,
284  int blockIndex,
285  Teuchos::ETransp mode,
286  LSC alpha,
287  LSC beta) const
288 {
290  size_t numVecs = X.extent(1);
291  size_t numRows = X.extent(0);
292 
293  #ifdef HAVE_IFPACK2_DEBUG
295  X.extent (0) != Y.extent (0),
296  std::logic_error, "Ifpack2::TriDiContainer::solveBlock: X and Y have "
297  "incompatible dimensions (" << X.extent (0) << " resp. "
298  << Y.extent (0) << "). Please report this bug to "
299  "the Ifpack2 developers.");
301  X.extent (0) != static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
302  std::logic_error, "Ifpack2::TriDiContainer::solveBlock: The input "
303  "multivector X has incompatible dimensions from those of the "
304  "inverse operator (" << X.extent (0) << " vs. "
305  << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols () : diagBlocks_[blockIndex].numRowsCols())
306  << "). Please report this bug to the Ifpack2 developers.");
307  TEUCHOS_TEST_FOR_EXCEPTION(
308  Y.extent (0) != static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
309  std::logic_error, "Ifpack2::TriDiContainer::solveBlock: The output "
310  "multivector Y has incompatible dimensions from those of the "
311  "inverse operator (" << Y.extent (0) << " vs. "
312  << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols ())
313  << "). Please report this bug to the Ifpack2 developers.");
314  #endif
315 
316  if(alpha == zero) { // don't need to solve the linear system
317  if(beta == zero) {
318  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
319  // any Inf or NaN values in Y (rather than multiplying them by
320  // zero, resulting in NaN values).
321  for(size_t j = 0; j < numVecs; j++)
322  for(size_t i = 0; i < numRows; i++)
323  Y(i, j) = zero;
324  }
325  else { // beta != 0
326  for(size_t j = 0; j < numVecs; j++)
327  for(size_t i = 0; i < numRows; i++)
328  Y(i, j) *= ISC(beta);
329  }
330  }
331  else { // alpha != 0; must solve the linear system
333  // If beta is nonzero or Y is not constant stride, we have to use
334  // a temporary output multivector. It gets a copy of X, since
335  // GETRS overwrites its (multi)vector input with its output.
336 
337  std::vector<LSC> yTemp(numVecs * numRows);
338  for(size_t j = 0; j < numVecs; j++)
339  {
340  for(size_t i = 0; i < numRows; i++)
341  yTemp[j * numRows + i] = X(i, j);
342  }
343 
344  int INFO = 0;
345  const char trans =
346  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
347  int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
348  lapack.GTTRS (trans,
349  diagBlocks_[blockIndex].numRowsCols(),
350  numVecs,
351  diagBlocks_[blockIndex].DL(),
352  diagBlocks_[blockIndex].D(),
353  diagBlocks_[blockIndex].DU(),
354  diagBlocks_[blockIndex].DU2(),
355  blockIpiv,
356  yTemp.data(),
357  numRows,
358  &INFO);
359 
360  if (beta != zero) {
361  for(size_t j = 0; j < numVecs; j++)
362  {
363  for(size_t i = 0; i < numRows; i++)
364  {
365  Y(i, j) *= ISC(beta);
366  Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
367  }
368  }
369  }
370  else {
371  for(size_t j = 0; j < numVecs; j++)
372  {
373  for(size_t i = 0; i < numRows; i++)
374  Y(i, j) = ISC(yTemp[j * numRows + i]);
375  }
376  }
377 
378  TEUCHOS_TEST_FOR_EXCEPTION(
379  INFO != 0, std::runtime_error, "Ifpack2::TriDiContainer::solveBlock: "
380  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
381  "failed with INFO = " << INFO << " != 0.");
382  }
383 }
384 
385 template<class MatrixType, class LocalScalarType>
386 std::ostream& TriDiContainer<MatrixType, LocalScalarType>::print(std::ostream& os) const
387 {
388  Teuchos::FancyOStream fos(Teuchos::rcp(&os,false));
389  fos.setOutputToRootOnly(0);
390  describe(fos);
391  return(os);
392 }
393 
394 template<class MatrixType, class LocalScalarType>
396 {
397  std::ostringstream oss;
399  if (this->isInitialized()) {
400  if (this->isComputed()) {
401  oss << "{status = initialized, computed";
402  }
403  else {
404  oss << "{status = initialized, not computed";
405  }
406  }
407  else {
408  oss << "{status = not initialized, not computed";
409  }
410 
411  oss << "}";
412  return oss.str();
413 }
414 
415 template<class MatrixType, class LocalScalarType>
416 void
419  const Teuchos::EVerbosityLevel verbLevel) const
420 {
421  using std::endl;
422  if(verbLevel==Teuchos::VERB_NONE) return;
423  os << "================================================================================" << endl;
424  os << "Ifpack2::TriDiContainer" << endl;
425  os << "Number of blocks = " << this->numBlocks_ << endl;
426  os << "isInitialized() = " << this->IsInitialized_ << endl;
427  os << "isComputed() = " << this->IsComputed_ << endl;
428  os << "================================================================================" << endl;
429  os << endl;
430 }
431 
432 template<class MatrixType, class LocalScalarType>
434 {
435  return "TriDi";
436 }
437 
438 #define IFPACK2_TRIDICONTAINER_INSTANT(S,LO,GO,N) \
439  template class Ifpack2::TriDiContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
440 
441 } // namespace Ifpack2
442 
443 #endif
Store and solve a local TriDi linear problem.
Definition: Ifpack2_TriDiContainer_decl.hpp:105
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_TriDiContainer_def.hpp:395
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_TriDiContainer_def.hpp:433
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition: Ifpack2_Container_decl.hpp:296
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition: Ifpack2_TriDiContainer_def.hpp:116
typename Kokkos::ArithTraits< SC >::val_type ISC
Internal representation of Scalar in Kokkos::View.
Definition: Ifpack2_Container_decl.hpp:135
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container_decl.hpp:292
void GTTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *dl, const ScalarType *d, const ScalarType *du, const ScalarType *du2, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
void GTTRF(const OrdinalType &n, ScalarType *dl, ScalarType *d, ScalarType *du, ScalarType *du2, OrdinalType *IPIV, OrdinalType *info) const
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:343
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LO scalarsPerRow_
Definition: Ifpack2_Container_decl.hpp:317
virtual ~TriDiContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_TriDiContainer_def.hpp:102
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_TriDiContainer_def.hpp:418
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_TriDiContainer_def.hpp:386
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
void resize(size_type new_size, const value_type &x=value_type())
TriDiContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_TriDiContainer_def.hpp:60
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_TriDiContainer_def.hpp:105
void solveBlock(ConstHostSubviewLocal X, HostSubviewLocal Y, int blockIndex, Teuchos::ETransp mode, LSC alpha, LSC beta) const
Definition: Ifpack2_TriDiContainer_def.hpp:282