Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_DenseContainer_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_DENSECONTAINER_DEF_HPP
44 #define IFPACK2_DENSECONTAINER_DEF_HPP
45 
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Teuchos_LAPACK.hpp"
48 #include "Tpetra_BlockMultiVector.hpp"
49 
50 #ifdef HAVE_MPI
51 # include <mpi.h>
53 #else
54 # include "Teuchos_DefaultSerialComm.hpp"
55 #endif // HAVE_MPI
56 
57 namespace Ifpack2 {
58 
59 template<class MatrixType, class LocalScalarType>
62  const Teuchos::Array<Teuchos::Array<LO> >& partitions,
64  bool pointIndexed) :
65  ContainerImpl<MatrixType, LocalScalarType> (matrix, partitions, pointIndexed),
66  scalarOffsets_ (this->numBlocks_)
67 {
69  !matrix->hasColMap(), std::invalid_argument, "Ifpack2::DenseContainer: "
70  "The constructor's input matrix must have a column Map.");
71 
72  //compute scalarOffsets_
73  GO totalScalars = 0;
74  for(LO i = 0; i < this->numBlocks_; i++)
75  {
76  scalarOffsets_[i] = totalScalars;
77  totalScalars += this->blockSizes_[i] * this->scalarsPerRow_ *
78  this->blockSizes_[i] * this->scalarsPerRow_;
79  }
80  scalars_.resize(totalScalars);
81  for(int i = 0; i < this->numBlocks_; i++)
82  {
83  LO denseRows = this->blockSizes_[i] * this->scalarsPerRow_;
84  //create square dense matrix (stride is same as rows and cols)
85  diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[i], denseRows, denseRows, denseRows);
86  }
87 
88  ipiv_.resize(this->blockRows_.size() * this->scalarsPerRow_);
89 }
90 
91 template<class MatrixType, class LocalScalarType>
94 
95 template<class MatrixType, class LocalScalarType>
96 void
99 {
100  // Fill the diagonal block and LU permutation array with zeros.
101  for(int i = 0; i < this->numBlocks_; i++)
102  diagBlocks_[i].putScalar(Teuchos::ScalarTraits<LSC>::zero());
103  std::fill (ipiv_.begin (), ipiv_.end (), 0);
104 
105  this->IsInitialized_ = true;
106  // We assume that if you called this method, you intend to recompute
107  // everything.
108  this->IsComputed_ = false;
109 }
110 
111 template<class MatrixType, class LocalScalarType>
112 void
115 {
116 // FIXME: I am commenting this out because it breaks block CRS support
117 // TEUCHOS_TEST_FOR_EXCEPTION(
118 // static_cast<size_t> (ipiv_.size ()) != numRows_, std::logic_error,
119 // "Ifpack2::DenseContainer::compute: ipiv_ array has the wrong size. "
120 // "Please report this bug to the Ifpack2 developers.");
121 
122  this->IsComputed_ = false;
123  if (!this->isInitialized ()) {
124  this->initialize();
125  }
126 
127  extract (); // Extract the submatrices
128  factor (); // Factor them
129  this->IsComputed_ = true;
130 }
131 
132 template<class MatrixType, class LocalScalarType>
134 {
135  using Teuchos::Array;
136  using Teuchos::ArrayView;
137  using STS = Teuchos::ScalarTraits<SC>;
138  SC zero = STS::zero();
139  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
140  //To extract diagonal blocks, need to translate local rows to local columns.
141  //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
142  //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
143  //offset - blockOffsets_[b]: gives the column within block b.
144  //
145  //This provides the block and col within a block in O(1).
146  if(this->scalarsPerRow_ > 1)
147  {
148  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
149  for(int i = 0; i < this->numBlocks_; i++)
150  {
151  //Get the interval where block i is defined in blockRows_
152  LO blockStart = this->blockOffsets_[i];
153  LO blockEnd = blockStart + this->blockSizes_[i];
154  ArrayView<const LO> blockRows = this->getBlockRows(i);
155  //Set the lookup table entries for the columns appearing in block i.
156  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
157  //this is OK. The values updated here are only needed to process block i's entries.
158  for(size_t j = 0; j < size_t(blockRows.size()); j++)
159  {
160  LO localCol = this->translateRowToCol(blockRows[j]);
161  colToBlockOffset[localCol] = blockStart + j;
162  }
163  using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
164  using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
165  for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
166  {
167  //get a raw view of the whole block row
168  h_inds_type indices;
169  h_vals_type values;
170  LO inputRow = this->blockRows_[blockStart + blockRow];
171  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
172  LO numEntries = (LO) indices.size();
173  for(LO k = 0; k < numEntries; k++)
174  {
175  LO colOffset = colToBlockOffset[indices[k]];
176  if(blockStart <= colOffset && colOffset < blockEnd)
177  {
178  //This entry does appear in the diagonal block.
179  //(br, bc) identifies the scalar's position in the BlockCrs block.
180  //Convert this to (r, c) which is its position in the container block.
181  LO blockCol = colOffset - blockStart;
182  for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
183  {
184  for(LO br = 0; br < this->bcrsBlockSize_; br++)
185  {
186  LO r = this->bcrsBlockSize_ * blockRow + br;
187  LO c = this->bcrsBlockSize_ * blockCol + bc;
188  auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
189  if(val != zero)
190  diagBlocks_[i](r, c) = val;
191  }
192  }
193  }
194  }
195  }
196  }
197  }
198  else
199  {
200  //get the mapping from point-indexed matrix columns to offsets in blockRows_
201  //(this includes regular CrsMatrix columns, in which case bcrsBlockSize_ == 1)
202  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
203  for(int i = 0; i < this->numBlocks_; i++)
204  {
205  //Get the interval where block i is defined in blockRows_
206  LO blockStart = this->blockOffsets_[i];
207  LO blockEnd = blockStart + this->blockSizes_[i];
208  ArrayView<const LO> blockRows = this->getBlockRows(i);
209  //Set the lookup table entries for the columns appearing in block i.
210  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
211  //this is OK. The values updated here are only needed to process block i's entries.
212  for(size_t j = 0; j < size_t(blockRows.size()); j++)
213  {
214  //translateRowToCol will return the corresponding split column
215  LO localCol = this->translateRowToCol(blockRows[j]);
216  colToBlockOffset[localCol] = blockStart + j;
217  }
218  for(size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++)
219  {
220  //get a view of the split row
221  LO inputPointRow = this->blockRows_[blockStart + blockRow];
222  auto rowView = this->getInputRowView(inputPointRow);
223  for(size_t k = 0; k < rowView.size(); k++)
224  {
225  LO colOffset = colToBlockOffset[rowView.ind(k)];
226  if(blockStart <= colOffset && colOffset < blockEnd)
227  {
228  LO blockCol = colOffset - blockStart;
229  auto val = rowView.val(k);
230  if(val != zero)
231  diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
232  }
233  }
234  }
235  }
236  }
237 }
238 
239 
240 template<class MatrixType, class LocalScalarType>
241 void
242 DenseContainer<MatrixType, LocalScalarType>::
243 factor ()
244 {
246  for(int i = 0; i < this->numBlocks_; i++)
247  {
248  int INFO = 0;
249  int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
250  lapack.GETRF(diagBlocks_[i].numRows(),
251  diagBlocks_[i].numCols(),
252  diagBlocks_[i].values(),
253  diagBlocks_[i].stride(),
254  blockIpiv, &INFO);
255  // INFO < 0 is a bug.
257  INFO < 0, std::logic_error, "Ifpack2::DenseContainer::factor: "
258  "LAPACK's _GETRF (LU factorization with partial pivoting) was called "
259  "incorrectly. INFO = " << INFO << " < 0. "
260  "Please report this bug to the Ifpack2 developers.");
261  // INFO > 0 means the matrix is singular. This is probably an issue
262  // either with the choice of rows the rows we extracted, or with the
263  // input matrix itself.
265  INFO > 0, std::runtime_error, "Ifpack2::DenseContainer::factor: "
266  "LAPACK's _GETRF (LU factorization with partial pivoting) reports that the "
267  "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") "
268  "(one-based index i) is exactly zero. This probably means that the input "
269  "matrix has a singular diagonal block.");
270  }
271 }
272 
273 template<class MatrixType, class LocalScalarType>
274 void
275 DenseContainer<MatrixType, LocalScalarType>::
276 solveBlock(ConstHostSubviewLocal X,
277  HostSubviewLocal Y,
278  int blockIndex,
279  Teuchos::ETransp mode,
280  LSC alpha,
281  LSC beta) const
282 {
283  #ifdef HAVE_IFPACK2_DEBUG
285  X.extent (0) != Y.extent (0),
286  std::logic_error, "Ifpack2::DenseContainer::solveBlock: X and Y have "
287  "incompatible dimensions (" << X.extent (0) << " resp. "
288  << Y.extent (0) << "). Please report this bug to "
289  "the Ifpack2 developers.");
290 
292  X.extent (1) != Y.extent(1),
293  std::logic_error, "Ifpack2::DenseContainer::solveBlock: X and Y have "
294  "incompatible numbers of vectors (" << X.extent (1) << " resp. "
295  << Y.extent (1) << "). Please report this bug to "
296  "the Ifpack2 developers.");
297  #endif
298 
299  typedef Teuchos::ScalarTraits<LSC> STS;
300  size_t numRows = X.extent(0);
301  size_t numVecs = X.extent(1);
302  if(alpha == STS::zero()) { // don't need to solve the linear system
303  if(beta == STS::zero()) {
304  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
305  // any Inf or NaN values in Y (rather than multiplying them by
306  // zero, resulting in NaN values).
307  for(size_t j = 0; j < numVecs; j++)
308  {
309  for(size_t i = 0; i < numRows; i++)
310  Y(i, j) = STS::zero();
311  }
312  }
313  else // beta != 0
314  for(size_t j = 0; j < numVecs; j++)
315  {
316  for(size_t i = 0; i < numRows; i++)
317  Y(i, j) = beta * Y(i, j);
318  }
319  }
320  else { // alpha != 0; must solve the linear system
322  // If beta is nonzero or Y is not constant stride, we have to use
323  // a temporary output multivector. It gets a (deep) copy of X,
324  // since GETRS overwrites its (multi)vector input with its output.
325  std::vector<LSC> yTemp(numVecs * numRows);
326  for(size_t j = 0; j < numVecs; j++)
327  {
328  for(size_t i = 0; i < numRows; i++)
329  yTemp[j * numRows + i] = X(i, j);
330  }
331 
332  int INFO = 0;
333  int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
334  const char trans =
335  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
336  lapack.GETRS (trans,
337  diagBlocks_[blockIndex].numRows (),
338  numVecs,
339  diagBlocks_[blockIndex].values (),
340  diagBlocks_[blockIndex].stride (),
341  blockIpiv,
342  yTemp.data(),
343  numRows,
344  &INFO);
345 
346  if (beta != STS::zero ()) {
347  for(size_t j = 0; j < numVecs; j++)
348  {
349  for(size_t i = 0; i < numRows; i++)
350  {
351  Y(i, j) *= ISC(beta);
352  Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
353  }
354  }
355  }
356  else {
357  for(size_t j = 0; j < numVecs; j++)
358  {
359  for(size_t i = 0; i < numRows; i++)
360  Y(i, j) = ISC(yTemp[j * numRows + i]);
361  }
362  }
363 
365  INFO != 0, std::runtime_error, "Ifpack2::DenseContainer::solveBlock: "
366  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
367  "failed with INFO = " << INFO << " != 0.");
368  }
369 }
370 
371 template<class MatrixType, class LocalScalarType>
372 std::ostream&
374 print (std::ostream& os) const
375 {
376  Teuchos::FancyOStream fos (Teuchos::rcpFromRef (os));
377  fos.setOutputToRootOnly (0);
378  this->describe (fos);
379  return os;
380 }
381 
382 template<class MatrixType, class LocalScalarType>
383 std::string
385 description () const
386 {
387  std::ostringstream oss;
388  oss << "Ifpack::DenseContainer: ";
389  if (this->isInitialized()) {
390  if (this->isComputed()) {
391  oss << "{status = initialized, computed";
392  }
393  else {
394  oss << "{status = initialized, not computed";
395  }
396  }
397  else {
398  oss << "{status = not initialized, not computed";
399  }
400 
401  oss << "}";
402  return oss.str();
403 }
404 
405 template<class MatrixType, class LocalScalarType>
406 void
409  const Teuchos::EVerbosityLevel verbLevel) const
410 {
411  using std::endl;
412  if(verbLevel==Teuchos::VERB_NONE) return;
413  os << "================================================================================" << endl;
414  os << "Ifpack2::DenseContainer" << endl;
415  for(int i = 0; i < this->numBlocks_; i++)
416  {
417  os << "Block " << i << " number of rows = " << this->blockSizes_[i] << endl;
418  }
419  os << "isInitialized() = " << this->IsInitialized_ << endl;
420  os << "isComputed() = " << this->IsComputed_ << endl;
421  os << "================================================================================" << endl;
422  os << endl;
423 }
424 
425 template<class MatrixType, class LocalScalarType>
427 {
428  diagBlocks_.clear();
429  scalars_.clear();
431 }
432 
433 template<class MatrixType, class LocalScalarType>
435 {
436  return "Dense";
437 }
438 
439 } // namespace Ifpack2
440 
441 // There's no need to instantiate for CrsMatrix too. All Ifpack2
442 // preconditioners can and should do dynamic casts if they need a type
443 // more specific than RowMatrix.
444 
445 #define IFPACK2_DENSECONTAINER_INSTANT(S,LO,GO,N) \
446  template class Ifpack2::DenseContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
447 
448 #endif // IFPACK2_DENSECONTAINER_DEF_HPP
Teuchos::Array< LO > blockRows_
Local indices of the rows of the input matrix that belong to this block.
Definition: Ifpack2_Container_decl.hpp:294
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition: Ifpack2_Container_decl.hpp:296
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container_decl.hpp:292
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:343
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition: Ifpack2_DenseContainer_def.hpp:114
LO scalarsPerRow_
Definition: Ifpack2_Container_decl.hpp:317
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_DenseContainer_def.hpp:434
DenseContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_DenseContainer_def.hpp:61
void GETRF(const OrdinalType &m, const OrdinalType &n, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const
Store and solve a local dense linear problem.
Definition: Ifpack2_DenseContainer_decl.hpp:103
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_DenseContainer_def.hpp:374
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_DenseContainer_def.hpp:408
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
void resize(size_type new_size, const value_type &x=value_type())
virtual ~DenseContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_DenseContainer_def.hpp:93
void GETRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
size_type size() const
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_DenseContainer_def.hpp:385
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_DenseContainer_def.hpp:98