Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_DenseContainer_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_DENSECONTAINER_DEF_HPP
44 #define IFPACK2_DENSECONTAINER_DEF_HPP
45 
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Teuchos_LAPACK.hpp"
48 #include "Tpetra_Experimental_BlockMultiVector.hpp"
49 
50 #ifdef HAVE_MPI
51 # include <mpi.h>
53 #else
54 # include "Teuchos_DefaultSerialComm.hpp"
55 #endif // HAVE_MPI
56 
57 namespace Ifpack2 {
58 
59 template<class MatrixType, class LocalScalarType>
60 DenseContainer<MatrixType, LocalScalarType, true>::
61 DenseContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
63  const Teuchos::RCP<const import_type>& importer,
64  int OverlapLevel,
65  scalar_type DampingFactor) :
66  Container<MatrixType> (matrix, partitions, importer, OverlapLevel,
67  DampingFactor),
68  scalars_ (nullptr),
69  scalarOffsets_ (this->numBlocks_)
70 {
71  using Teuchos::Array;
72  using Teuchos::ArrayView;
73  using Teuchos::RCP;
74  using Teuchos::rcp;
75  using Teuchos::ptr;
76  using Teuchos::toString;
77  typedef typename ArrayView<const local_ordinal_type>::size_type size_type;
79  !matrix->hasColMap(), std::invalid_argument, "Ifpack2::DenseContainer: "
80  "The constructor's input matrix must have a column Map.");
81 
82  //compute scalarOffsets_
83  global_ordinal_type totalScalars = 0;
84  for(local_ordinal_type i = 0; i < this->numBlocks_; i++)
85  {
86  scalarOffsets_[i] = totalScalars;
87  totalScalars += this->blockRows_[i] * this->blockRows_[i]
88  * this->bcrsBlockSize_ * this->bcrsBlockSize_;
89  }
90  scalars_ = new local_scalar_type[totalScalars];
91  for(int i = 0; i < this->numBlocks_; i++)
92  {
93  int nnodes = this->blockRows_[i];
94  int denseRows = nnodes * this->bcrsBlockSize_;
95  //create square dense matrix (stride is same as rows and cols)
96  diagBlocks_.emplace_back(Teuchos::View, scalars_ + scalarOffsets_[i], denseRows, denseRows, denseRows);
97  diagBlocks_[i].putScalar(0);
98  }
99 
100  ipiv_.resize(this->partitions_.size() * this->bcrsBlockSize_);
101 
102  for(int i = 0; i < this->numBlocks_; i++)
103  {
104  Teuchos::ArrayView<const local_ordinal_type> localRows = this->getLocalRows(i);
105  // Check whether the input set of local row indices is correct.
106  const map_type& rowMap = * (matrix->getRowMap ());
107  const size_type numRows = localRows.size ();
108  bool rowIndicesValid = true;
109  Array<local_ordinal_type> invalidLocalRowIndices;
110  for(size_type j = 0; j < numRows; j++) {
111  if(!rowMap.isNodeLocalElement(localRows[j])) {
112  rowIndicesValid = false;
113  invalidLocalRowIndices.push_back(localRows[j]);
114  break;
115  }
116  }
118  !rowIndicesValid, std::invalid_argument, "Ifpack2::DenseContainer: "
119  "On process " << rowMap.getComm()->getRank() << " of "
120  << rowMap.getComm()->getSize() << ", in the given set of local row "
121  "indices localRows = " << toString(localRows) << ", the following "
122  "entries are not valid local row indices on the calling process: "
123  << toString(invalidLocalRowIndices) << ".");
124  }
125  IsInitialized_ = false;
126  IsComputed_ = false;
127 }
128 
129 template<class MatrixType, class LocalScalarType>
130 DenseContainer<MatrixType, LocalScalarType, true>::
131 DenseContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
132  const Teuchos::Array<local_ordinal_type>& localRows) :
133  Container<MatrixType>(matrix, localRows),
134  scalars_(nullptr)
135 {
136  using Teuchos::Array;
137  using Teuchos::ArrayView;
138  using Teuchos::RCP;
139  using Teuchos::rcp;
140  using Teuchos::toString;
141  typedef typename ArrayView<const local_ordinal_type>::size_type size_type;
143  !matrix->hasColMap(), std::invalid_argument, "Ifpack2::DenseContainer: "
144  "The constructor's input matrix must have a column Map.");
145  diagBlocks_.emplace_back(this->blockRows_[0] * this->bcrsBlockSize_,
146  this->blockRows_[0] * this->bcrsBlockSize_);
147  diagBlocks_[0].putScalar(0);
148 
149  ipiv_.resize(this->partitions_.size() * this->bcrsBlockSize_);
150 
151  for(int i = 0; i < this->numBlocks_; i++)
152  {
153  // Check whether the input set of local row indices is correct.
154  const map_type& rowMap = *(matrix->getRowMap());
155  const size_type numRows = localRows.size ();
156  bool rowIndicesValid = true;
157  Array<local_ordinal_type> invalidLocalRowIndices;
158  for(size_type j = 0; j < numRows; j++)
159  {
160  if(!rowMap.isNodeLocalElement(localRows[j]))
161  {
162  rowIndicesValid = false;
163  invalidLocalRowIndices.push_back(localRows[j]);
164  break;
165  }
166  }
168  !rowIndicesValid, std::invalid_argument, "Ifpack2::DenseContainer: "
169  "On process " << rowMap.getComm()->getRank() << " of "
170  << rowMap.getComm()->getSize() << ", in the given set of local row "
171  "indices localRows = " << toString (localRows) << ", the following "
172  "entries are not valid local row indices on the calling process: "
173  << toString(invalidLocalRowIndices) << ".");
174  }
175  // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a
176  // different index base than zero?
177  IsInitialized_ = false;
178  IsComputed_ = false;
179 }
180 
181 template<class MatrixType, class LocalScalarType>
182 DenseContainer<MatrixType, LocalScalarType, true>::~DenseContainer()
183 {
184  if(scalars_)
185  delete[] scalars_;
186 }
187 
188 template<class MatrixType, class LocalScalarType>
189 void DenseContainer<MatrixType, LocalScalarType, true>::
190 setParameters (const Teuchos::ParameterList& /* List */)
191 {
192  // the solver doesn't currently take any parameters
193 }
194 
195 template<class MatrixType, class LocalScalarType>
196 void
197 DenseContainer<MatrixType, LocalScalarType, true>::
198 initialize ()
199 {
200  using Teuchos::null;
201  using Teuchos::rcp;
202 
203  // We assume that if you called this method, you intend to recompute
204  // everything.
205  IsInitialized_ = false;
206  IsComputed_ = false;
207 
208  // Fill the diagonal block and LU permutation array with zeros.
209  for(int i = 0; i < this->numBlocks_; i++)
210  diagBlocks_[i].putScalar(Teuchos::ScalarTraits<local_scalar_type>::zero());
211  std::fill (ipiv_.begin (), ipiv_.end (), 0);
212  IsInitialized_ = true;
213 }
214 
215 template<class MatrixType, class LocalScalarType>
216 void
217 DenseContainer<MatrixType, LocalScalarType, true>::
218 compute ()
219 {
220 // FIXME: I am commenting this out because it breaks block CRS support
221 // TEUCHOS_TEST_FOR_EXCEPTION(
222 // static_cast<size_t> (ipiv_.size ()) != numRows_, std::logic_error,
223 // "Ifpack2::DenseContainer::compute: ipiv_ array has the wrong size. "
224 // "Please report this bug to the Ifpack2 developers.");
225 
226  IsComputed_ = false;
227  if (! this->isInitialized ()) {
228  this->initialize();
229  }
230 
231  // Extract the submatrix.
232  extract ();
233  factor (); // factor the submatrices
234 
235  IsComputed_ = true;
236 }
237 
238 template<class MatrixType, class LocalScalarType>
239 void
240 DenseContainer<MatrixType, LocalScalarType, true>::
241 factor ()
242 {
244  for(int i = 0; i < this->numBlocks_; i++)
245  {
246  int INFO = 0;
247  int* blockIpiv = ipiv_.getRawPtr() + this->partitionIndices_[i] * this->bcrsBlockSize_;
248  lapack.GETRF(diagBlocks_[i].numRows(),
249  diagBlocks_[i].numCols(),
250  diagBlocks_[i].values(),
251  diagBlocks_[i].stride(),
252  blockIpiv, &INFO);
253  // INFO < 0 is a bug.
255  INFO < 0, std::logic_error, "Ifpack2::DenseContainer::factor: "
256  "LAPACK's _GETRF (LU factorization with partial pivoting) was called "
257  "incorrectly. INFO = " << INFO << " < 0. "
258  "Please report this bug to the Ifpack2 developers.");
259  // INFO > 0 means the matrix is singular. This is probably an issue
260  // either with the choice of rows the rows we extracted, or with the
261  // input matrix itself.
263  INFO > 0, std::runtime_error, "Ifpack2::DenseContainer::factor: "
264  "LAPACK's _GETRF (LU factorization with partial pivoting) reports that the "
265  "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") "
266  "(one-based index i) is exactly zero. This probably means that the input "
267  "matrix has a singular diagonal block.");
268  }
269 }
270 
271 template<class MatrixType, class LocalScalarType>
272 void
273 DenseContainer<MatrixType, LocalScalarType, true>::
274 applyImplBlockCrs (HostViewLocal& X,
275  HostViewLocal& Y,
276  int blockIndex,
277  int stride,
278  Teuchos::ETransp mode,
279  local_scalar_type alpha,
280  local_scalar_type beta) const
281 {
282  using Teuchos::ArrayRCP;
283  using Teuchos::Ptr;
284  using Teuchos::ptr;
285  using Teuchos::RCP;
286  using Teuchos::rcp;
287  using Teuchos::rcpFromRef;
288 
290  const size_t numRows = X.extent(0);
291  const size_t numVecs = X.extent(1);
292 
294  static_cast<size_t> (X.extent (0)) != static_cast<size_t> (diagBlocks_[blockIndex].numRows ()),
295  std::logic_error, "Ifpack2::DenseContainer::applyImpl: X and Y have "
296  "different number of rows than block matrix (" << X.extent(0) << " resp. "
297  << diagBlocks_[blockIndex].numRows() << "). Please report this bug to "
298  "the Ifpack2 developers.");
299 
300  if (alpha == STS::zero ()) { // don't need to solve the linear system
301  if (beta == STS::zero ()) {
302  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
303  // any Inf or NaN values in Y (rather than multiplying them by
304  // zero, resulting in NaN values).
305  for(size_t i = 0; i < numRows; i++)
306  for(size_t j = 0; j < numVecs; j++)
307  Y(i, j) = STS::zero();
308  }
309  else { // beta != 0
310  for(size_t i = 0; i < numRows; i++)
311  for(size_t j = 0; j < numVecs; j++)
312  Y(i, j) = beta * (local_impl_scalar_type) Y(i, j);
313  }
314  }
315  else { // alpha != 0; must solve the linear system
317  // If beta is nonzero or Y is not constant stride, we have to use
318  // a temporary output multivector. It gets a (deep) copy of X,
319  // since GETRS overwrites its (multi)vector input with its output.
320  Ptr<HostViewLocal> Y_tmp;
321  bool deleteYT = false;
322  if (beta == STS::zero () ){
323  Kokkos::deep_copy(Y, X);
324  Y_tmp = ptr(&Y);
325  }
326  else {
327  Y_tmp = ptr (new HostViewLocal ("", X.extent(0), X.extent(1)));
328  Kokkos::deep_copy(*Y_tmp, X);
329  deleteYT = true;
330  }
331  local_scalar_type* const Y_ptr = (local_scalar_type*) Y_tmp->data();
332  int INFO = 0;
333  const char trans =
334  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
335  int* blockIpiv = (int*) ipiv_.getRawPtr()
336  + this->partitionIndices_[blockIndex] * this->bcrsBlockSize_;
337  lapack.GETRS (trans,
338  diagBlocks_[blockIndex].numRows (),
339  numVecs,
340  diagBlocks_[blockIndex].values (),
341  diagBlocks_[blockIndex].stride (),
342  blockIpiv,
343  Y_ptr,
344  stride, &INFO);
346  INFO != 0, std::runtime_error, "Ifpack2::DenseContainer::applyImpl: "
347  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
348  "failed with INFO = " << INFO << " != 0.");
349 
350  if (beta != STS::zero ()) {
351  for(size_t i = 0; i < Y.extent(0); i++)
352  {
353  for(size_t j = 0; j < Y.extent(1); j++)
354  {
355  Y(i, j) = beta * (local_impl_scalar_type) Y(i, j);
356  Y(i, j) += alpha * (*Y_tmp)(i, j);
357  }
358  }
359  }
360  if(deleteYT)
361  delete Y_tmp.get();
362  }
363 }
364 
365 template<class MatrixType, class LocalScalarType>
366 void
367 DenseContainer<MatrixType, LocalScalarType, true>::
368 applyImpl (HostViewLocal& X,
369  HostViewLocal& Y,
370  int blockIndex,
371  int stride,
372  Teuchos::ETransp mode,
373  local_scalar_type alpha,
374  local_scalar_type beta) const
375 {
376  using Teuchos::ArrayRCP;
377  using Teuchos::Ptr;
378  using Teuchos::ptr;
379  using Teuchos::RCP;
380  using Teuchos::rcp;
381  using Teuchos::rcpFromRef;
382 
384  X.extent (0) != Y.extent (0),
385  std::logic_error, "Ifpack2::DenseContainer::applyImpl: X and Y have "
386  "incompatible dimensions (" << X.extent (0) << " resp. "
387  << Y.extent (0) << "). Please report this bug to "
388  "the Ifpack2 developers.");
389 
391  X.extent (1) != Y.extent(1),
392  std::logic_error, "Ifpack2::DenseContainer::applyImpl: X and Y have "
393  "incompatible numbers of vectors (" << X.extent (1) << " resp. "
394  << Y.extent (1) << "). Please report this bug to "
395  "the Ifpack2 developers.");
396 
397  if(this->hasBlockCrs_) {
398  applyImplBlockCrs(X,Y,blockIndex,stride,mode,alpha,beta);
399  return;
400  }
401 
403  size_t numVecs = X.extent(1);
404  if(alpha == STS::zero()) { // don't need to solve the linear system
405  if(beta == STS::zero()) {
406  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
407  // any Inf or NaN values in Y (rather than multiplying them by
408  // zero, resulting in NaN values).
409  for(size_t i = 0; i < Y.extent(0); i++)
410  {
411  for(size_t j = 0; j < Y.extent(1); j++)
412  Y(i, j) = STS::zero();
413  }
414  }
415  else // beta != 0
416  for(size_t i = 0; i < Y.extent(0); i++)
417  {
418  for(size_t j = 0; j < Y.extent(1); j++)
419  Y(i, j) = beta * (local_impl_scalar_type) Y(i, j);
420  }
421  }
422  else { // alpha != 0; must solve the linear system
424  // If beta is nonzero or Y is not constant stride, we have to use
425  // a temporary output multivector. It gets a (deep) copy of X,
426  // since GETRS overwrites its (multi)vector input with its output.
427  Ptr<HostViewLocal> Y_tmp;
428  bool deleteYT = false;
429  if (beta == STS::zero () ){
430  Kokkos::deep_copy (Y, X);
431  Y_tmp = ptr (&Y);
432  }
433  else {
434  Y_tmp = ptr (new HostViewLocal ("", Y.extent(0), Y.extent(1)));
435  Kokkos::deep_copy(*Y_tmp, X);
436  deleteYT = true;
437  }
438  local_scalar_type* Y_ptr = (local_scalar_type*) Y_tmp->data();
439  int INFO = 0;
440  int* blockIpiv = (int*) ipiv_.getRawPtr() + this->partitionIndices_[blockIndex] * this->bcrsBlockSize_;
441  const char trans =
442  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
443  lapack.GETRS (trans,
444  diagBlocks_[blockIndex].numRows (),
445  numVecs,
446  diagBlocks_[blockIndex].values (),
447  diagBlocks_[blockIndex].stride (),
448  blockIpiv,
449  Y_ptr,
450  stride, &INFO);
452  INFO != 0, std::runtime_error, "Ifpack2::DenseContainer::applyImpl: "
453  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
454  "failed with INFO = " << INFO << " != 0.");
455 
456  if (beta != STS::zero ()) {
457  for(size_t i = 0; i < Y.extent(0); i++)
458  {
459  for(size_t j = 0; j < Y.extent(1); j++)
460  Y(i, j) = Y(i, j) * (local_impl_scalar_type) beta + (local_impl_scalar_type) alpha * (*Y_tmp)(i, j);
461  }
462  }
463  if(deleteYT)
464  delete Y_tmp.get();
465  }
466 }
467 
468 template<class MatrixType, class LocalScalarType>
469 void
470 DenseContainer<MatrixType, LocalScalarType, true>::
471 applyBlockCrs (HostView& XIn,
472  HostView& YIn,
473  int blockIndex,
474  int stride,
475  Teuchos::ETransp mode,
476  scalar_type alpha,
477  scalar_type beta) const
478 {
479  using Teuchos::ArrayView;
480  using Teuchos::ArrayRCP;
481  using Teuchos::as;
482  using Teuchos::RCP;
483  using Teuchos::rcp;
484 
485  const size_t numRows = this->blockRows_[blockIndex];
486 
487  // The local operator might have a different Scalar type than
488  // MatrixType. This means that we might have to convert X and Y to
489  // the Tpetra::MultiVector specialization that the local operator
490  // wants. This class' X_ and Y_ internal fields are of the right
491  // type for the local operator, so we can use those as targets.
492 
493  const char prefix[] = "Ifpack2::DenseContainer::weightedApply: ";
495  ! IsComputed_, std::runtime_error, prefix << "You must have called the "
496  "compute() method before you may call this method. You may call "
497  "apply() as many times as you want after calling compute() once, "
498  "but you must have called compute() at least once first.");
499  const size_t numVecs = XIn.extent (1);
501  numVecs != YIn.extent (1), std::runtime_error,
502  prefix << "X and Y have different numbers of vectors (columns). X has "
503  << XIn.extent (1) << ", but Y has " << YIn.extent (1) << ".");
504 
505  if (numVecs == 0) {
506  return; // done! nothing to do
507  }
508 
509  // The local operator works on a permuted subset of the local parts
510  // of X and Y. The subset and permutation are defined by the index
511  // array returned by getLocalRows(). If the permutation is trivial
512  // and the subset is exactly equal to the local indices, then we
513  // could use the local parts of X and Y exactly, without needing to
514  // permute. Otherwise, we have to use temporary storage to permute
515  // X and Y. For now, we always use temporary storage.
516  //
517  // Create temporary permuted versions of the input and output.
518  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
519  // store the permuted versions of X resp. Y. Note that X_local has
520  // the domain Map of the operator, which may be a permuted subset of
521  // the local Map corresponding to X.getMap(). Similarly, Y_local
522  // has the range Map of the operator, which may be a permuted subset
523  // of the local Map corresponding to Y.getMap(). numRows_ here
524  // gives the number of rows in the row Map of the local Inverse_
525  // operator.
526  //
527  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
528  // here that the row Map and the range Map of that operator are
529  // the same.
530  //
531  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
532  // really belongs in Tpetra.
533 
534  if(X_local.size() == 0)
535  {
536  //create all X_local and Y_local managed Views at once, are
537  //reused in subsequent apply() calls
538  for(int i = 0; i < this->numBlocks_; i++)
539  {
540  X_local.emplace_back("", this->blockRows_[i] * this->bcrsBlockSize_, numVecs);
541  }
542  for(int i = 0; i < this->numBlocks_; i++)
543  {
544  Y_local.emplace_back("", this->blockRows_[i] * this->bcrsBlockSize_, numVecs);
545  }
546  }
547  HostViewLocal& XOut = X_local[blockIndex];
548  HostViewLocal& YOut = Y_local[blockIndex];
549 
550  ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
551  // Gather x
552  for (size_t j = 0; j < numVecs; ++j) {
553  for (size_t i = 0; i < numRows; ++i) {
554  const size_t i_perm = localRows[i];
555  for (int k = 0; k < this->bcrsBlockSize_; ++k)
556  XOut(i*this->bcrsBlockSize_+k, j) = XIn(i_perm*this->bcrsBlockSize_+k, j);
557  }
558  }
559 
560  // We must gather the contents of the output multivector Y even on
561  // input to applyImpl(), since the inverse operator might use it as
562  // an initial guess for a linear solve. We have no way of knowing
563  // whether it does or does not.
564 
565  // gather Y
566  for (size_t j = 0; j < numVecs; ++j) {
567  for (size_t i = 0; i < numRows; ++i) {
568  const size_t i_perm = localRows[i];
569  for (int k = 0; k < this->bcrsBlockSize_; ++k)
570  YOut(i*this->bcrsBlockSize_+k, j) = YIn(i_perm*this->bcrsBlockSize_+k, j);
571  }
572  }
573 
574  // Apply the local operator:
575  // Y_local := beta*Y_local + alpha*M^{-1}*X_local
576  this->applyImpl (XOut, YOut, blockIndex, stride, mode, as<local_scalar_type>(alpha),
577  as<local_scalar_type>(beta));
578 
579  // Scatter the permuted subset output vector Y_local back into the
580  // original output multivector Y.
581  for(size_t j = 0; j < numVecs; ++j) {
582  for(size_t i = 0; i < numRows; ++i) {
583  const size_t i_perm = localRows[i];
584  for(int k = 0; k < this->bcrsBlockSize_; ++k)
585  YIn(i_perm*this->bcrsBlockSize_+k, j) = YOut(i*this->bcrsBlockSize_+k, j);
586  }
587  }
588 }
589 
590 template<class MatrixType, class LocalScalarType>
591 void
592 DenseContainer<MatrixType, LocalScalarType, true>::
593 apply (HostView& X,
594  HostView& Y,
595  int blockIndex,
596  int stride,
597  Teuchos::ETransp mode,
598  scalar_type alpha,
599  scalar_type beta) const
600 {
601  using Teuchos::ArrayView;
602  using Teuchos::as;
603  using Teuchos::RCP;
604  using Teuchos::rcp;
605 
606  // if we have a block CRS matrix, call the appropriate method
607  if(this->hasBlockCrs_) {
608  applyBlockCrs(X,Y,blockIndex,stride,mode,alpha,beta);
609  return;
610  }
611  const size_t numVecs = X.extent(1);
612 
613  // The local operator might have a different Scalar type than
614  // MatrixType. This means that we might have to convert X and Y to
615  // the Tpetra::MultiVector specialization that the local operator
616  // wants. This class' X_ and Y_ internal fields are of the right
617  // type for the local operator, so we can use those as targets.
618 
619  const char prefix[] = "Ifpack2::DenseContainer::weightedApply: ";
621  ! IsComputed_, std::runtime_error, prefix << "You must have called the "
622  "compute() method before you may call this method. You may call "
623  "apply() as many times as you want after calling compute() once, "
624  "but you must have called compute() at least once first.");
626  X.extent (1) != Y.extent (1), std::runtime_error,
627  prefix << "X and Y have different numbers of vectors (columns). X has "
628  << X.extent (1) << ", but Y has " << Y.extent (1) << ".");
629 
630  if (numVecs == 0) {
631  return; // done! nothing to do
632  }
633 
634  // The local operator works on a permuted subset of the local parts
635  // of X and Y. The subset and permutation are defined by the index
636  // array returned by getLocalRows(). If the permutation is trivial
637  // and the subset is exactly equal to the local indices, then we
638  // could use the local parts of X and Y exactly, without needing to
639  // permute. Otherwise, we have to use temporary storage to permute
640  // X and Y. For now, we always use temporary storage.
641  //
642  // Create temporary permuted versions of the input and output.
643  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
644  // store the permuted versions of X resp. Y. Note that X_local has
645  // the domain Map of the operator, which may be a permuted subset of
646  // the local Map corresponding to X.getMap(). Similarly, Y_local
647  // has the range Map of the operator, which may be a permuted subset
648  // of the local Map corresponding to Y.getMap(). numRows_ here
649  // gives the number of rows in the row Map of the local Inverse_
650  // operator.
651  //
652  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
653  // here that the row Map and the range Map of that operator are
654  // the same.
655  //
656  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
657  // really belongs in Tpetra.
658 
659  if(X_local.size() == 0)
660  {
661  //create all X_local and Y_local managed Views at once, are
662  //reused in subsequent apply() calls
663  for(int i = 0; i < this->numBlocks_; i++)
664  {
665  X_local.emplace_back("", this->blockRows_[i], numVecs);
666  }
667  for(int i = 0; i < this->numBlocks_; i++)
668  {
669  Y_local.emplace_back("", this->blockRows_[i], numVecs);
670  }
671  }
672 
673  const ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
674 
675  Details::MultiVectorLocalGatherScatter<mv_type, local_mv_type> mvgs;
676  mvgs.gatherViewToView (X_local[blockIndex], X, localRows);
677 
678  // We must gather the contents of the output multivector Y even on
679  // input to applyImpl(), since the inverse operator might use it as
680  // an initial guess for a linear solve. We have no way of knowing
681  // whether it does or does not.
682 
683  mvgs.gatherViewToView (Y_local[blockIndex], Y, localRows);
684 
685  // Apply the local operator:
686  // Y_local := beta*Y_local + alpha*M^{-1}*X_local
687  this->applyImpl (X_local[blockIndex], Y_local[blockIndex], blockIndex, stride, mode,
688  as<local_scalar_type>(alpha), as<local_scalar_type>(beta));
689 
690  // Scatter the permuted subset output vector Y_local back into the
691  // original output multivector Y.
692  mvgs.scatterViewToView (Y, Y_local[blockIndex], localRows);
693 }
694 
695 template<class MatrixType, class LocalScalarType>
696 void DenseContainer<MatrixType, LocalScalarType, true>::
697 weightedApply (HostView& X,
698  HostView& Y,
699  HostView& D,
700  int blockIndex,
701  int stride,
702  Teuchos::ETransp mode,
703  scalar_type alpha,
704  scalar_type beta) const
705 {
706  using Teuchos::ArrayRCP;
707  using Teuchos::ArrayView;
708  using Teuchos::Range1D;
709  using Teuchos::Ptr;
710  using Teuchos::ptr;
711  using Teuchos::RCP;
712  using Teuchos::rcp;
713  using Teuchos::rcp_const_cast;
714  using std::endl;
716 
717  // The local operator template parameter might have a different
718  // Scalar type than MatrixType. This means that we might have to
719  // convert X and Y to the Tpetra::MultiVector specialization that
720  // the local operator wants. This class' X_ and Y_ internal fields
721  // are of the right type for the local operator, so we can use those
722  // as targets.
723 
724  const char prefix[] = "Ifpack2::DenseContainer::weightedApply: ";
726  ! IsComputed_, std::runtime_error, prefix << "You must have called the "
727  "compute() method before you may call this method. You may call "
728  "weightedApply() as many times as you want after calling compute() once, "
729  "but you must have called compute() at least once first.");
730 
731  const size_t numVecs = X.extent(1);
732 
734  X.extent(1) != Y.extent(1), std::runtime_error,
735  prefix << "X and Y have different numbers of vectors (columns). X has "
736  << X.extent(1) << ", but Y has " << Y.extent(1) << ".");
737 
738  if(numVecs == 0) {
739  return; // done! nothing to do
740  }
741 
742  const size_t numRows = this->blockRows_[blockIndex];
743 
744  // The local operator works on a permuted subset of the local parts
745  // of X and Y. The subset and permutation are defined by the index
746  // array returned by getLocalRows(). If the permutation is trivial
747  // and the subset is exactly equal to the local indices, then we
748  // could use the local parts of X and Y exactly, without needing to
749  // permute. Otherwise, we have to use temporary storage to permute
750  // X and Y. For now, we always use temporary storage.
751  //
752  // Create temporary permuted versions of the input and output.
753  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
754  // store the permuted versions of X resp. Y. Note that X_local has
755  // the domain Map of the operator, which may be a permuted subset of
756  // the local Map corresponding to X.getMap(). Similarly, Y_local
757  // has the range Map of the operator, which may be a permuted subset
758  // of the local Map corresponding to Y.getMap(). numRows_ here
759  // gives the number of rows in the row Map of the local operator.
760  //
761  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
762  // here that the row Map and the range Map of that operator are
763  // the same.
764  //
765  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
766  // really belongs in Tpetra.
767 
768  if(X_local.size() == 0)
769  {
770  //create all X_local and Y_local managed Views at once, are
771  //reused in subsequent apply() calls
772  for(int i = 0; i < this->numBlocks_; i++)
773  {
774  X_local.emplace_back("", this->blockRows_[i], numVecs);
775  }
776  for(int i = 0; i < this->numBlocks_; i++)
777  {
778  Y_local.emplace_back("", this->blockRows_[i], numVecs);
779  }
780  }
781 
782  ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
783 
784  Details::MultiVectorLocalGatherScatter<mv_type, local_mv_type> mvgs;
785  mvgs.gatherViewToView (X_local[blockIndex], X, localRows);
786  // We must gather the output multivector Y even on input to
787  // applyImpl(), since the local operator might use it as an initial
788  // guess for a linear solve. We have no way of knowing whether it
789  // does or does not.
790 
791  mvgs.gatherViewToView (Y_local[blockIndex], Y, localRows);
792 
793  // Apply the diagonal scaling D to the input X. It's our choice
794  // whether the result has the original input Map of X, or the
795  // permuted subset Map of X_local. If the latter, we also need to
796  // gather D into the permuted subset Map. We choose the latter, to
797  // save memory and computation. Thus, we do the following:
798  //
799  // 1. Gather D into a temporary vector D_local.
800  // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
801  // 3. Compute X_scaled := diag(D_loca) * X_local.
802 
803  HostViewLocal D_local("", numRows, 1);
804  mvgs.gatherViewToView (D_local, D, localRows);
805  HostViewLocal X_scaled("", numRows, numVecs);
806  for(size_t j = 0; j < numVecs; j++)
807  for(size_t i = 0; i < numRows; i++)
808  X_scaled(i, j) = X_local[blockIndex](i, j) * D_local(i, 0);
809 
810  // Y_temp will hold the result of M^{-1}*X_scaled. If beta == 0, we
811  // can write the result of Inverse_->apply() directly to Y_local, so
812  // Y_temp may alias Y_local. Otherwise, if beta != 0, we need
813  // temporary storage for M^{-1}*X_scaled, so Y_temp must be
814  // different than Y_local.
815  Ptr<HostViewLocal> Y_temp;
816  bool deleteYT = false;
817  if(beta == STS::zero())
818  {
819  Y_temp = ptr(&Y_local[blockIndex]);
820  } else {
821  Y_temp = ptr(new HostViewLocal("", numRows, numVecs));
822  deleteYT = true;
823  }
824 
825  // Apply the local operator: Y_temp := M^{-1} * X_scaled
826  this->applyImpl (X_scaled, *Y_temp, blockIndex, stride, mode, STS::one(), STS::zero());
827  // Y_local := beta * Y_local + alpha * diag(D_local) * Y_temp.
828  //
829  // Note that we still use the permuted subset scaling D_local here,
830  // because Y_temp has the same permuted subset Map. That's good, in
831  // fact, because it's a subset: less data to read and multiply.
832  for(size_t j = 0; j < numVecs; j++)
833  for(size_t i = 0; i < numRows; i++)
834  Y_local[blockIndex](i, j) = Y_local[blockIndex](i, j) * (local_impl_scalar_type) beta + (local_impl_scalar_type) alpha * (*Y_temp)(i, j) * D_local(i, 0);
835 
836  if(deleteYT)
837  delete Y_temp.get();
838 
839  // Copy the permuted subset output vector Y_local into the original
840  // output multivector Y.
841  mvgs.scatterViewToView (Y, Y_local[blockIndex], localRows);
842 }
843 
844 template<class MatrixType, class LocalScalarType>
845 std::ostream&
846 DenseContainer<MatrixType, LocalScalarType, true>::
847 print (std::ostream& os) const
848 {
849  Teuchos::FancyOStream fos (Teuchos::rcpFromRef (os));
850  fos.setOutputToRootOnly (0);
851  this->describe (fos);
852  return os;
853 }
854 
855 template<class MatrixType, class LocalScalarType>
856 std::string
857 DenseContainer<MatrixType, LocalScalarType, true>::
858 description () const
859 {
860  std::ostringstream oss;
861  oss << "Ifpack::DenseContainer: ";
862  if (isInitialized()) {
863  if (isComputed()) {
864  oss << "{status = initialized, computed";
865  }
866  else {
867  oss << "{status = initialized, not computed";
868  }
869  }
870  else {
871  oss << "{status = not initialized, not computed";
872  }
873 
874  oss << "}";
875  return oss.str();
876 }
877 
878 template<class MatrixType, class LocalScalarType>
879 void
880 DenseContainer<MatrixType, LocalScalarType, true>::
881 describe (Teuchos::FancyOStream& os,
882  const Teuchos::EVerbosityLevel verbLevel) const
883 {
884  using std::endl;
885  if(verbLevel==Teuchos::VERB_NONE) return;
886  os << "================================================================================" << endl;
887  os << "Ifpack2::DenseContainer" << endl;
888  for(int i = 0; i < this->numBlocks_; i++)
889  {
890  os << "Block " << i << " number of rows = " << this->blockRows_[i] << endl;
891  }
892  os << "isInitialized() = " << IsInitialized_ << endl;
893  os << "isComputed() = " << IsComputed_ << endl;
894  os << "================================================================================" << endl;
895  os << endl;
896 }
897 
898 
899 template<class MatrixType, class LocalScalarType>
900 void
901 DenseContainer<MatrixType, LocalScalarType, true>::
902 extractBlockCrs ()
903 {
904  using Teuchos::Array;
905  using Teuchos::ArrayView;
906  using Teuchos::toString;
907  auto& A = this->inputMatrix_;
908  const size_t inputMatrixNumRows = A->getNodeNumRows();
909  // We only use the rank of the calling process and the number of MPI
910  // processes for generating error messages. Extraction itself is
911  // entirely local to each participating MPI process.
912  const int myRank = A->getRowMap ()->getComm ()->getRank ();
913  const int numProcs = A->getRowMap ()->getComm ()->getSize ();
914 
915  // Sanity check that the local row indices to extract fall within
916  // the valid range of local row indices for the input matrix.
917  for(int i = 0; i < this->numBlocks_; ++i) {
918  ArrayView<const local_ordinal_type> localRows = this->getLocalRows(i);
919  for(local_ordinal_type j = 0; j < this->blockRows_[i]; ++j) {
921  localRows[j] < 0 ||
922  static_cast<size_t>(localRows[j]) >= inputMatrixNumRows,
923  std::runtime_error, "Ifpack2::DenseContainer::extract: On process " <<
924  myRank << " of " << numProcs << ", localRows[j=" << j << "] = " <<
925  localRows[j] << ", which is out of the valid range of local row indices "
926  "indices [0, " << (inputMatrixNumRows - 1) << "] for the input matrix.");
927  }
928  }
929 
930  // Convert the local row indices we want into local column indices.
931  // For every local row ii_local = localRows[i] we take, we also want
932  // to take the corresponding column. To find the corresponding
933  // column, we use the row Map to convert the local row index
934  // ii_local into a global index ii_global, and then use the column
935  // Map to convert ii_global into a local column index jj_local. If
936  // the input matrix doesn't have a column Map, we need to be using
937  // global indices anyway...
938 
939  // We use the domain Map to exclude off-process global entries.
940  auto globalRowMap = A->getRowMap ();
941  auto globalColMap = A->getColMap ();
942  auto globalDomMap = A->getDomainMap ();
943 
944  for(int blockIndex = 0; blockIndex < this->numBlocks_; blockIndex++)
945  {
946  const local_ordinal_type numRows_ = this->blockRows_[blockIndex];
947  Teuchos::ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
948  bool rowIndsValid = true;
949  bool colIndsValid = true;
950  Array<local_ordinal_type> localCols(numRows_);
951  // For error messages, collect the sets of invalid row indices and
952  // invalid column indices. They are otherwise not useful.
953  Array<local_ordinal_type> invalidLocalRowInds;
954  Array<global_ordinal_type> invalidGlobalColInds;
955  for (local_ordinal_type i = 0; i < numRows_; i++)
956  {
957  // ii_local is the (local) row index we want to look up.
958  const local_ordinal_type ii_local = localRows[i];
959  // Find the global index jj_global corresponding to ii_local.
960  // Global indices are the same (rather, are required to be the
961  // same) in all three Maps, which is why we use jj (suggesting a
962  // column index, which is how we will use it below).
963  const global_ordinal_type jj_global = globalRowMap->getGlobalElement(ii_local);
965  {
966  // If ii_local is not a local index in the row Map on the
967  // calling process, that means localRows is incorrect. We've
968  // already checked for this in the constructor, but we might as
969  // well check again here, since it's cheap to do so (just an
970  // integer comparison, since we need jj_global anyway).
971  rowIndsValid = false;
972  invalidLocalRowInds.push_back(ii_local);
973  break;
974  }
975  // Exclude "off-process" entries: that is, those in the column Map
976  // on this process that are not in the domain Map on this process.
977  if(globalDomMap->isNodeGlobalElement(jj_global))
978  {
979  // jj_global is not an off-process entry. Look up its local
980  // index in the column Map; we want to extract this column index
981  // from the input matrix. If jj_global is _not_ in the column
982  // Map on the calling process, that could mean that the column
983  // in question is empty on this process. That would be bad for
984  // solving linear systems with the extract submatrix. We could
985  // solve the resulting singular linear systems in a minimum-norm
986  // least-squares sense, but for now we simply raise an exception.
987  const local_ordinal_type jj_local = globalColMap->getLocalElement(jj_global);
989  {
990  colIndsValid = false;
991  invalidGlobalColInds.push_back(jj_global);
992  break;
993  }
994  localCols[i] = jj_local;
995  }
996  }
998  !rowIndsValid, std::logic_error, "Ifpack2::DenseContainer::extract: "
999  "On process " << myRank << ", at least one row index in the set of local "
1000  "row indices given to the constructor is not a valid local row index in "
1001  "the input matrix's row Map on this process. This should be impossible "
1002  "because the constructor checks for this case. Here is the complete set "
1003  "of invalid local row indices: " << toString(invalidLocalRowInds) << ". "
1004  "Please report this bug to the Ifpack2 developers.");
1006  !colIndsValid, std::runtime_error, "Ifpack2::DenseContainer::extract: "
1007  "On process " << myRank << ", "
1008  "At least one row index in the set of row indices given to the constructor "
1009  "does not have a corresponding column index in the input matrix's column "
1010  "Map. This probably means that the column(s) in question is/are empty on "
1011  "this process, which would make the submatrix to extract structurally "
1012  "singular. Here is the compete set of invalid global column indices: "
1013  << toString(invalidGlobalColInds) << ".");
1014 
1015  diagBlocks_[blockIndex].putScalar(Teuchos::ScalarTraits<local_scalar_type>::zero());
1016 
1017  const size_t maxNumEntriesInRow = A->getNodeMaxNumRowEntries();
1018  Array<local_ordinal_type> ind(maxNumEntriesInRow);
1019 
1020  const local_ordinal_type INVALID = Teuchos::OrdinalTraits<local_ordinal_type>::invalid();
1021 
1022  Array<scalar_type> val(maxNumEntriesInRow * this->bcrsBlockSize_ * this->bcrsBlockSize_);
1023  for(local_ordinal_type i = 0; i < numRows_; i++)
1024  {
1025  const local_ordinal_type localRow = localRows[i];
1026  size_t numEntries;
1027  A->getLocalRowCopy(localRow, ind(), val(), numEntries);
1028 
1029  for(size_t k = 0; k < numEntries; k++)
1030  {
1031  const local_ordinal_type localCol = ind[k];
1032  // Skip off-process elements
1033  //
1034  // FIXME (mfh 24 Aug 2013) This assumes the following:
1035  //
1036  // 1. The column and row Maps begin with the same set of
1037  // on-process entries, in the same order. That is,
1038  // on-process row and column indices are the same.
1039  // 2. All off-process indices in the column Map of the input
1040  // matrix occur after that initial set.
1041  if(localCol >= 0 && static_cast<size_t> (localCol) < inputMatrixNumRows)
1042  {
1043  // for local column IDs, look for each ID in the list
1044  // of columns hosted by this object
1045  local_ordinal_type jj = INVALID;
1046  for(local_ordinal_type kk = 0; kk < numRows_; kk++)
1047  {
1048  if(localRows[kk] == localCol)
1049  jj = kk;
1050  }
1051  if(jj != INVALID)
1052  {
1053  // copy entire diagonal block
1054  for(local_ordinal_type c = 0; c < this->bcrsBlockSize_; c++)
1055  {
1056  for(local_ordinal_type r = 0; r < this->bcrsBlockSize_; r++)
1057  diagBlocks_[blockIndex](this->bcrsBlockSize_ * i + r,
1058  this->bcrsBlockSize_ * jj + c)
1059  = val[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_)
1060  + (r + this->bcrsBlockSize_ * c)];
1061  }
1062  }
1063  }
1064  }
1065  }
1066  }
1067 }
1068 
1069 
1070 template<class MatrixType, class LocalScalarType>
1071 void
1072 DenseContainer<MatrixType, LocalScalarType, true>::
1073 extract ()
1074 {
1075  using Teuchos::Array;
1076  using Teuchos::ArrayView;
1077  using Teuchos::toString;
1078  auto& A = *this->inputMatrix_;
1079  const size_t inputMatrixNumRows = A.getNodeNumRows();
1080  // We only use the rank of the calling process and the number of MPI
1081  // processes for generating error messages. Extraction itself is
1082  // entirely local to each participating MPI process.
1083  const int myRank = A.getRowMap ()->getComm ()->getRank ();
1084  const int numProcs = A.getRowMap ()->getComm ()->getSize ();
1085 
1086  for(int blockIndex = 0; blockIndex < this->numBlocks_; blockIndex++)
1087  {
1088  local_ordinal_type numRows_ = this->blockRows_[blockIndex];
1089  // If this is a block CRS matrix, call the appropriate function
1090  if(this->hasBlockCrs_)
1091  {
1092  extractBlockCrs();
1093  return;
1094  }
1095 
1096  // Sanity check that the local row indices to extract fall within
1097  // the valid range of local row indices for the input matrix.
1098  ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
1099  for(local_ordinal_type j = 0; j < numRows_; j++)
1100  {
1102  localRows[j] < 0 ||
1103  static_cast<size_t> (localRows[j]) >= inputMatrixNumRows,
1104  std::runtime_error, "Ifpack2::DenseContainer::extract: On process " <<
1105  myRank << " of " << numProcs << ", localRows[j=" << j << "] = " <<
1106  localRows[j] << ", which is out of the valid range of local row indices "
1107  "indices [0, " << (inputMatrixNumRows - 1) << "] for the input matrix.");
1108  }
1109 
1110  // Convert the local row indices we want into local column indices.
1111  // For every local row ii_local = localRows[i] we take, we also want
1112  // to take the corresponding column. To find the corresponding
1113  // column, we use the row Map to convert the local row index
1114  // ii_local into a global index ii_global, and then use the column
1115  // Map to convert ii_global into a local column index jj_local. If
1116  // the input matrix doesn't have a column Map, we need to be using
1117  // global indices anyway...
1118 
1119  // We use the domain Map to exclude off-process global entries.
1120  const map_type& globalRowMap = * (A.getRowMap ());
1121  const map_type& globalColMap = * (A.getColMap ());
1122  const map_type& globalDomMap = * (A.getDomainMap ());
1123 
1124  bool rowIndsValid = true;
1125  bool colIndsValid = true;
1126  Array<local_ordinal_type> localCols(numRows_);
1127  // For error messages, collect the sets of invalid row indices and
1128  // invalid column indices. They are otherwise not useful.
1129  Array<local_ordinal_type> invalidLocalRowInds;
1130  Array<global_ordinal_type> invalidGlobalColInds;
1131  for(local_ordinal_type i = 0; i < numRows_; i++)
1132  {
1133  // ii_local is the (local) row index we want to look up.
1134  const local_ordinal_type ii_local = localRows[i];
1135  // Find the global index jj_global corresponding to ii_local.
1136  // Global indices are the same (rather, are required to be the
1137  // same) in all three Maps, which is why we use jj (suggesting a
1138  // column index, which is how we will use it below).
1139  const global_ordinal_type jj_global = globalRowMap.getGlobalElement(ii_local);
1141  {
1142  // If ii_local is not a local index in the row Map on the
1143  // calling process, that means localRows is incorrect. We've
1144  // already checked for this in the constructor, but we might as
1145  // well check again here, since it's cheap to do so (just an
1146  // integer comparison, since we need jj_global anyway).
1147  rowIndsValid = false;
1148  invalidLocalRowInds.push_back(ii_local);
1149  break;
1150  }
1151  // Exclude "off-process" entries: that is, those in the column Map
1152  // on this process that are not in the domain Map on this process.
1153  if(globalDomMap.isNodeGlobalElement(jj_global))
1154  {
1155  // jj_global is not an off-process entry. Look up its local
1156  // index in the column Map; we want to extract this column index
1157  // from the input matrix. If jj_global is _not_ in the column
1158  // Map on the calling process, that could mean that the column
1159  // in question is empty on this process. That would be bad for
1160  // solving linear systems with the extract submatrix. We could
1161  // solve the resulting singular linear systems in a minimum-norm
1162  // least-squares sense, but for now we simply raise an exception.
1163  const local_ordinal_type jj_local = globalColMap.getLocalElement(jj_global);
1165  {
1166  colIndsValid = false;
1167  invalidGlobalColInds.push_back(jj_global);
1168  break;
1169  }
1170  localCols[i] = jj_local;
1171  }
1172  }
1174  !rowIndsValid, std::logic_error, "Ifpack2::DenseContainer::extract: "
1175  "On process " << myRank << ", at least one row index in the set of local "
1176  "row indices given to the constructor is not a valid local row index in "
1177  "the input matrix's row Map on this process. This should be impossible "
1178  "because the constructor checks for this case. Here is the complete set "
1179  "of invalid local row indices: " << toString(invalidLocalRowInds) << ". "
1180  "Please report this bug to the Ifpack2 developers.");
1182  !colIndsValid, std::runtime_error, "Ifpack2::DenseContainer::extract: "
1183  "On process " << myRank << ", "
1184  "At least one row index in the set of row indices given to the constructor "
1185  "does not have a corresponding column index in the input matrix's column "
1186  "Map. This probably means that the column(s) in question is/are empty on "
1187  "this process, which would make the submatrix to extract structurally "
1188  "singular. Here is the compete set of invalid global column indices: "
1189  << toString(invalidGlobalColInds) << ".");
1190 
1191  diagBlocks_[blockIndex].putScalar(Teuchos::ScalarTraits<local_scalar_type>::zero());
1192 
1193  const size_t maxNumEntriesInRow = A.getNodeMaxNumRowEntries();
1194  Array<local_ordinal_type> ind(maxNumEntriesInRow);
1195 
1196  const local_ordinal_type INVALID = Teuchos::OrdinalTraits<local_ordinal_type>::invalid();
1197 
1198  Array<scalar_type> val(maxNumEntriesInRow);
1199  for (local_ordinal_type i = 0; i < numRows_; i++)
1200  {
1201  const local_ordinal_type localRow = localRows[i];
1202  size_t numEntries;
1203  A.getLocalRowCopy(localRow, ind(), val(), numEntries);
1204  for (size_t k = 0; k < numEntries; ++k)
1205  {
1206  const local_ordinal_type localCol = ind[k];
1207  // Skip off-process elements
1208  //
1209  // FIXME (mfh 24 Aug 2013) This assumes the following:
1210  //
1211  // 1. The column and row Maps begin with the same set of
1212  // on-process entries, in the same order. That is,
1213  // on-process row and column indices are the same.
1214  // 2. All off-process indices in the column Map of the input
1215  // matrix occur after that initial set.
1216  if(localCol >= 0 && static_cast<size_t> (localCol) < inputMatrixNumRows)
1217  {
1218  // for local column IDs, look for each ID in the list
1219  // of columns hosted by this object
1220  local_ordinal_type jj = INVALID;
1221  for(local_ordinal_type kk = 0; kk < numRows_; kk++)
1222  {
1223  if(localRows[kk] == localCol)
1224  jj = kk;
1225  }
1226  if(jj != INVALID)
1227  diagBlocks_[blockIndex](i, jj) += val[k]; // ???
1228  }
1229  }
1230  }
1231  }
1232 }
1233 
1234 template<class MatrixType, class LocalScalarType>
1235 void DenseContainer<MatrixType, LocalScalarType, true>::clearBlocks()
1236 {
1237  std::vector<Teuchos::SerialDenseMatrix<int, local_scalar_type>> empty1;
1238  std::swap(diagBlocks_, empty1);
1239  Teuchos::Array<int> empty2;
1240  Teuchos::swap(ipiv_, empty2);
1241  std::vector<HostViewLocal> empty3;
1242  std::swap(X_local, empty3);
1243  std::vector<HostViewLocal> empty4;
1244  std::swap(Y_local, empty4);
1245  Container<MatrixType>::clearBlocks();
1246 }
1247 
1248 template<class MatrixType, class LocalScalarType>
1249 std::string DenseContainer<MatrixType, LocalScalarType, true>::getName()
1250 {
1251  return "Dense";
1252 }
1253 
1254 template<class MatrixType, class LocalScalarType>
1255 DenseContainer<MatrixType, LocalScalarType, false>::
1256 DenseContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
1258  const Teuchos::RCP<const import_type>& importer,
1259  int OverlapLevel,
1260  scalar_type DampingFactor) :
1261  Container<MatrixType> (matrix, partitions, importer, OverlapLevel,
1262  DampingFactor)
1263 {
1265  (true, std::logic_error, "Ifpack2::DenseContainer: Not implemented for "
1266  "LocalScalarType = " << Teuchos::TypeNameTraits<LocalScalarType>::name ()
1267  << ".");
1268 }
1269 
1270 template<class MatrixType, class LocalScalarType>
1271 DenseContainer<MatrixType, LocalScalarType, false>::
1272 DenseContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
1273  const Teuchos::Array<local_ordinal_type>& localRows) :
1274  Container<MatrixType>(matrix, localRows)
1275 {
1277  (true, std::logic_error, "Ifpack2::DenseContainer: Not implemented for "
1278  "LocalScalarType = " << Teuchos::TypeNameTraits<LocalScalarType>::name ()
1279  << ".");
1280 }
1281 
1282 template<class MatrixType, class LocalScalarType>
1283 DenseContainer<MatrixType, LocalScalarType, false>::~DenseContainer() {}
1284 
1285 template<class MatrixType, class LocalScalarType>
1286 void DenseContainer<MatrixType, LocalScalarType, false>::
1287 setParameters (const Teuchos::ParameterList& /* List */) {}
1288 
1289 template<class MatrixType, class LocalScalarType>
1290 void DenseContainer<MatrixType, LocalScalarType, false>::initialize() {}
1291 
1292 template<class MatrixType, class LocalScalarType>
1293 void DenseContainer<MatrixType, LocalScalarType, false>::compute() {}
1294 
1295 template<class MatrixType, class LocalScalarType>
1296 void DenseContainer<MatrixType, LocalScalarType, false>::factor() {}
1297 
1298 template<class MatrixType, class LocalScalarType>
1299 void DenseContainer<MatrixType, LocalScalarType, false>::
1300 applyImplBlockCrs (HostViewLocal& X,
1301  HostViewLocal& Y,
1302  int blockIndex,
1303  int stride,
1304  Teuchos::ETransp mode,
1305  local_scalar_type alpha,
1306  local_scalar_type beta) const {}
1307 
1308 template<class MatrixType, class LocalScalarType>
1309 void DenseContainer<MatrixType, LocalScalarType, false>::
1310 applyImpl (HostViewLocal& X,
1311  HostViewLocal& Y,
1312  int blockIndex,
1313  int stride,
1314  Teuchos::ETransp mode,
1315  local_scalar_type alpha,
1316  local_scalar_type beta) const {}
1317 
1318 template<class MatrixType, class LocalScalarType>
1319 void DenseContainer<MatrixType, LocalScalarType, false>::
1320 applyBlockCrs (HostView& XIn,
1321  HostView& YIn,
1322  int blockIndex,
1323  int stride,
1324  Teuchos::ETransp mode,
1325  scalar_type alpha,
1326  scalar_type beta) const {}
1327 
1328 template<class MatrixType, class LocalScalarType>
1329 void DenseContainer<MatrixType, LocalScalarType, false>::
1330 apply (HostView& X,
1331  HostView& Y,
1332  int blockIndex,
1333  int stride,
1334  Teuchos::ETransp mode,
1335  scalar_type alpha,
1336  scalar_type beta) const {}
1337 
1338 template<class MatrixType, class LocalScalarType>
1339 void DenseContainer<MatrixType, LocalScalarType, false>::
1340 weightedApply (HostView& X,
1341  HostView& Y,
1342  HostView& D,
1343  int blockIndex,
1344  int stride,
1345  Teuchos::ETransp mode,
1346  scalar_type alpha,
1347  scalar_type beta) const {}
1348 
1349 template<class MatrixType, class LocalScalarType>
1350 std::ostream& DenseContainer<MatrixType, LocalScalarType, false>::
1351 print (std::ostream& os) const
1352 {
1353  return os;
1354 }
1355 
1356 template<class MatrixType, class LocalScalarType>
1357 std::string DenseContainer<MatrixType, LocalScalarType, false>::
1358 description () const
1359 {
1360  return "";
1361 }
1362 
1363 template<class MatrixType, class LocalScalarType>
1364 void DenseContainer<MatrixType, LocalScalarType, false>::
1365 describe (Teuchos::FancyOStream& os,
1366  const Teuchos::EVerbosityLevel verbLevel) const {}
1367 
1368 template<class MatrixType, class LocalScalarType>
1369 void DenseContainer<MatrixType, LocalScalarType, false>::
1370 extractBlockCrs () {}
1371 
1372 template<class MatrixType, class LocalScalarType>
1373 void DenseContainer<MatrixType, LocalScalarType, false>::
1374 extract () {}
1375 
1376 template<class MatrixType, class LocalScalarType>
1377 void DenseContainer<MatrixType, LocalScalarType, false>::clearBlocks() {}
1378 
1379 template<class MatrixType, class LocalScalarType>
1380 std::string DenseContainer<MatrixType, LocalScalarType, false>::getName()
1381 {
1382  return "";
1383 }
1384 
1385 } // namespace Ifpack2
1386 
1387 // There's no need to instantiate for CrsMatrix too. All Ifpack2
1388 // preconditioners can and should do dynamic casts if they need a type
1389 // more specific than RowMatrix.
1390 
1391 #define IFPACK2_DENSECONTAINER_INSTANT(S,LO,GO,N) \
1392  template class Ifpack2::DenseContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
1393 
1394 #endif // IFPACK2_DENSECONTAINER_DEF_HPP
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
void GETRF(const OrdinalType &m, const OrdinalType &n, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
TEUCHOSCORE_LIB_DLL_EXPORT std::string toString(const EVerbosityLevel verbLevel)
void GETRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
size_type size() const
TypeTo as(const TypeFrom &t)
std::string toString(const T &t)