Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_SparseContainer_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_SPARSECONTAINER_DEF_HPP
44 #define IFPACK2_SPARSECONTAINER_DEF_HPP
45 
47 #ifdef HAVE_MPI
48 #include <mpi.h>
50 #else
51 #include "Teuchos_DefaultSerialComm.hpp"
52 #endif
54 
55 namespace Ifpack2 {
56 
57 //==============================================================================
58 template<class MatrixType, class InverseType>
61  const Teuchos::Array<Teuchos::Array<LO> >& partitions,
63  bool pointIndexed) :
64  ContainerImpl<MatrixType, InverseScalar> (matrix, partitions, pointIndexed),
65 #ifdef HAVE_MPI
66  localComm_ (Teuchos::rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF)))
67 #else
68  localComm_ (Teuchos::rcp (new Teuchos::SerialComm<int> ()))
69 #endif // HAVE_MPI
70 {}
71 
72 //==============================================================================
73 template<class MatrixType, class InverseType>
76 
77 //==============================================================================
78 template<class MatrixType, class InverseType>
80 {
81  List_ = List;
82 }
83 
84 //==============================================================================
85 template<class MatrixType, class InverseType>
87 {
88  // We assume that if you called this method, you intend to recompute
89  // everything. Thus, we release references to all the internal
90  // objects. We do this first to save memory. (In an RCP
91  // assignment, the right-hand side and left-hand side coexist before
92  // the left-hand side's reference count gets updated.)
94  Teuchos::TimeMonitor::getNewCounter ("Ifpack2::SparseContainer::initialize");
95  Teuchos::TimeMonitor timeMon (*timer);
96 
97  //Will create the diagonal blocks and their inverses
98  //in extract()
99  diagBlocks_.assign(this->numBlocks_, Teuchos::null);
100  Inverses_.assign(this->numBlocks_, Teuchos::null);
101 
102  // Extract the submatrices.
103  this->extractGraph();
104 
105  // Initialize the inverse operator.
106  for(int i = 0; i < this->numBlocks_; i++)
107  {
108  Inverses_[i]->setParameters(List_);
109  Inverses_[i]->initialize ();
110  }
111 
112  this->IsInitialized_ = true;
113  this->IsComputed_ = false;
114 }
115 
116 //==============================================================================
117 template<class MatrixType, class InverseType>
119 {
121  Teuchos::TimeMonitor::getNewCounter ("Ifpack2::SparseContainer::compute");
122  Teuchos::TimeMonitor timeMon (*timer);
123 
124  this->IsComputed_ = false;
125  if (!this->isInitialized ()) {
126  this->initialize ();
127  }
128 
129  // Extract the submatrices values
130  this->extractValues();
131 
132  // Compute the inverse operator.
133  for(int i = 0; i < this->numBlocks_; i++) {
134  Inverses_[i]->compute ();
135  }
136 
137  this->IsComputed_ = true;
138 }
139 
140 //==============================================================================
141 template<class MatrixType, class InverseType>
143 {
144  for(auto inv : Inverses_)
145  delete inv.get();
146  Inverses_.clear();
147  diagBlocks_.clear();
149 }
150 
151 //==============================================================================
152 template<class MatrixType, class InverseType>
154 solveBlockMV(const inverse_mv_type& X,
155  inverse_mv_type& Y,
156  int blockIndex,
157  Teuchos::ETransp mode,
158  InverseScalar alpha,
159  InverseScalar beta) const
160 {
162  Inverses_[blockIndex]->getDomainMap()->getLocalNumElements() != X.getLocalLength(),
163  std::logic_error, "Ifpack2::SparseContainer::apply: Inverse_ "
164  "operator and X have incompatible dimensions (" <<
165  Inverses_[blockIndex]->getDomainMap()->getLocalNumElements() << " resp. "
166  << X.getLocalLength() << "). Please report this bug to "
167  "the Ifpack2 developers.");
169  Inverses_[blockIndex]->getRangeMap()->getLocalNumElements() != Y.getLocalLength(),
170  std::logic_error, "Ifpack2::SparseContainer::apply: Inverse_ "
171  "operator and Y have incompatible dimensions (" <<
172  Inverses_[blockIndex]->getRangeMap()->getLocalNumElements() << " resp. "
173  << Y.getLocalLength() << "). Please report this bug to "
174  "the Ifpack2 developers.");
175  Inverses_[blockIndex]->apply(X, Y, mode, alpha, beta);
176 }
177 
178 template<class MatrixType, class InverseType>
180 apply (ConstHostView X,
181  HostView Y,
182  int blockIndex,
183  Teuchos::ETransp mode,
184  SC alpha,
185  SC beta) const
186 {
187  using Teuchos::ArrayView;
188  using Teuchos::as;
189 
190  // The InverseType template parameter might have different template
191  // parameters (Scalar, LO, GO, and/or Node) than MatrixType. For
192  // example, MatrixType (a global object) might use a bigger GO
193  // (global ordinal) type than InverseType (which deals with the
194  // diagonal block, a local object). This means that we might have
195  // to convert X and Y to the Tpetra::MultiVector specialization that
196  // InverseType wants. This class' X_ and Y_ internal fields are of
197  // the right type for InverseType, so we can use those as targets.
199  Teuchos::TimeMonitor::getNewCounter ("Ifpack2::SparseContainer::apply");
200  Teuchos::TimeMonitor timeMon (*timer);
201 
202 
203  // Tpetra::MultiVector specialization corresponding to InverseType.
205  size_t numVecs = X.extent(1);
206 
208  ! this->IsComputed_, std::runtime_error, "Ifpack2::SparseContainer::apply: "
209  "You must have called the compute() method before you may call apply(). "
210  "You may call the apply() method as many times as you want after calling "
211  "compute() once, but you must have called compute() at least once.");
213  X.extent(1) != Y.extent(1), std::runtime_error,
214  "Ifpack2::SparseContainer::apply: X and Y have different numbers of "
215  "vectors. X has " << X.extent(1)
216  << ", but Y has " << Y.extent(1) << ".");
217 
218  if (numVecs == 0) {
219  return; // done! nothing to do
220  }
221 
222  const LO numRows = this->blockSizes_[blockIndex];
223 
224  // The operator Inverse_ works on a permuted subset of the local
225  // parts of X and Y. The subset and permutation are defined by the
226  // index array returned by getBlockRows(). If the permutation is
227  // trivial and the subset is exactly equal to the local indices,
228  // then we could use the local parts of X and Y exactly, without
229  // needing to permute. Otherwise, we have to use temporary storage
230  // to permute X and Y. For now, we always use temporary storage.
231  //
232  // Create temporary permuted versions of the input and output.
233  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
234  // store the permuted versions of X resp. Y. Note that X_local has
235  // the domain Map of the operator, which may be a permuted subset of
236  // the local Map corresponding to X.getMap(). Similarly, Y_local
237  // has the range Map of the operator, which may be a permuted subset
238  // of the local Map corresponding to Y.getMap(). numRows here
239  // gives the number of rows in the row Map of the local Inverse_
240  // operator.
241  //
242  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
243  // here that the row Map and the range Map of that operator are
244  // the same.
245  //
246  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
247  // really belongs in Tpetra.
248 
249  if(invX.size() == 0)
250  {
251  for(LO i = 0; i < this->numBlocks_; i++)
252  invX.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
253  for(LO i = 0; i < this->numBlocks_; i++)
254  invY.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
255  }
256  inverse_mv_type& X_local = invX[blockIndex];
258  X_local.getLocalLength() != size_t(numRows * this->scalarsPerRow_), std::logic_error,
259  "Ifpack2::SparseContainer::apply: "
260  "X_local has length " << X_local.getLocalLength() << ", which does "
261  "not match numRows = " << numRows * this->scalarsPerRow_ << ". Please report this bug to "
262  "the Ifpack2 developers.");
263  const ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
264  if(this->scalarsPerRow_ == 1)
265  mvgs.gatherMVtoView(X_local, X, blockRows);
266  else
267  mvgs.gatherMVtoViewBlock(X_local, X, blockRows, this->scalarsPerRow_);
268 
269  // We must gather the output multivector Y even on input to
270  // Inverse_->apply(), since the Inverse_ operator might use it as an
271  // initial guess for a linear solve. We have no way of knowing
272  // whether it does or does not.
273 
274  inverse_mv_type& Y_local = invY[blockIndex];
276  Y_local.getLocalLength () != size_t(numRows * this->scalarsPerRow_), std::logic_error,
277  "Ifpack2::SparseContainer::apply: "
278  "Y_local has length " << Y_local.getLocalLength () << ", which does "
279  "not match numRows = " << numRows * this->scalarsPerRow_ << ". Please report this bug to "
280  "the Ifpack2 developers.");
281 
282  if(this->scalarsPerRow_ == 1)
283  mvgs.gatherMVtoView(Y_local, Y, blockRows);
284  else
285  mvgs.gatherMVtoViewBlock(Y_local, Y, blockRows, this->scalarsPerRow_);
286 
287  // Apply the local operator:
288  // Y_local := beta*Y_local + alpha*M^{-1}*X_local
289  this->solveBlockMV(X_local, Y_local, blockIndex, mode,
290  InverseScalar(alpha), InverseScalar(beta));
291 
292 
293  // Scatter the permuted subset output vector Y_local back into the
294  // original output multivector Y.
295  if(this->scalarsPerRow_ == 1)
296  mvgs.scatterMVtoView(Y, Y_local, blockRows);
297  else
298  mvgs.scatterMVtoViewBlock(Y, Y_local, blockRows, this->scalarsPerRow_);
299 }
300 
301 //==============================================================================
302 template<class MatrixType, class InverseType>
304 weightedApply (ConstHostView X,
305  HostView Y,
306  ConstHostView D,
307  int blockIndex,
308  Teuchos::ETransp mode,
309  SC alpha,
310  SC beta) const
311 {
312  using Teuchos::ArrayView;
313  using Teuchos::Range1D;
314  using std::cerr;
315  using std::endl;
317 
318  // The InverseType template parameter might have different template
319  // parameters (Scalar, LO, GO, and/or Node) than MatrixType. For
320  // example, MatrixType (a global object) might use a bigger GO
321  // (global ordinal) type than InverseType (which deals with the
322  // diagonal block, a local object). This means that we might have
323  // to convert X and Y to the Tpetra::MultiVector specialization that
324  // InverseType wants. This class' X_ and Y_ internal fields are of
325  // the right type for InverseType, so we can use those as targets.
326 
327  // Tpetra::Vector specialization corresponding to InverseType.
328  typedef Tpetra::Vector<InverseScalar, InverseLocalOrdinal, InverseGlobalOrdinal, InverseNode> inverse_vector_type;
329 
331  const size_t numVecs = X.extent(1);
332 
334  ! this->IsComputed_, std::runtime_error, "Ifpack2::SparseContainer::"
335  "weightedApply: You must have called the compute() method before you may "
336  "call apply(). You may call the apply() method as many times as you want "
337  "after calling compute() once, but you must have called compute() at least "
338  "once.");
340  X.extent(1) != Y.extent(1), std::runtime_error,
341  "Ifpack2::SparseContainer::weightedApply: X and Y have different numbers "
342  "of vectors. X has " << X.extent(1) << ", but Y has "
343  << Y.extent(1) << ".");
344 
345  //bmk 7-2019: BlockRelaxation already checked this, but if that changes...
347  this->scalarsPerRow_ > 1, std::logic_error, "Ifpack2::SparseContainer::weightedApply: "
348  "Use of block rows isn't allowed in overlapping Jacobi (the only method that uses weightedApply");
349 
350  if (numVecs == 0) {
351  return; // done! nothing to do
352  }
353 
354  // The operator Inverse_ works on a permuted subset of the local
355  // parts of X and Y. The subset and permutation are defined by the
356  // index array returned by getBlockRows(). If the permutation is
357  // trivial and the subset is exactly equal to the local indices,
358  // then we could use the local parts of X and Y exactly, without
359  // needing to permute. Otherwise, we have to use temporary storage
360  // to permute X and Y. For now, we always use temporary storage.
361  //
362  // Create temporary permuted versions of the input and output.
363  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
364  // store the permuted versions of X resp. Y. Note that X_local has
365  // the domain Map of the operator, which may be a permuted subset of
366  // the local Map corresponding to X.getMap(). Similarly, Y_local
367  // has the range Map of the operator, which may be a permuted subset
368  // of the local Map corresponding to Y.getMap(). numRows here
369  // gives the number of rows in the row Map of the local Inverse_
370  // operator.
371  //
372  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
373  // here that the row Map and the range Map of that operator are
374  // the same.
375  //
376  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
377  // really belongs in Tpetra.
378  const LO numRows = this->blockSizes_[blockIndex];
379 
380  if(invX.size() == 0)
381  {
382  for(LO i = 0; i < this->numBlocks_; i++)
383  invX.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
384  for(LO i = 0; i < this->numBlocks_; i++)
385  invY.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
386  }
387  inverse_mv_type& X_local = invX[blockIndex];
388  const ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
389  mvgs.gatherMVtoView(X_local, X, blockRows);
390 
391  // We must gather the output multivector Y even on input to
392  // Inverse_->apply(), since the Inverse_ operator might use it as an
393  // initial guess for a linear solve. We have no way of knowing
394  // whether it does or does not.
395 
396  inverse_mv_type Y_local = invY[blockIndex];
397  TEUCHOS_TEST_FOR_EXCEPTION(
398  Y_local.getLocalLength() != size_t(numRows), std::logic_error,
399  "Ifpack2::SparseContainer::weightedApply: "
400  "Y_local has length " << X_local.getLocalLength() << ", which does "
401  "not match numRows = " << numRows << ". Please report this bug to "
402  "the Ifpack2 developers.");
403  mvgs.gatherMVtoView(Y_local, Y, blockRows);
404 
405  // Apply the diagonal scaling D to the input X. It's our choice
406  // whether the result has the original input Map of X, or the
407  // permuted subset Map of X_local. If the latter, we also need to
408  // gather D into the permuted subset Map. We choose the latter, to
409  // save memory and computation. Thus, we do the following:
410  //
411  // 1. Gather D into a temporary vector D_local.
412  // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
413  // 3. Compute X_scaled := diag(D_loca) * X_local.
414 
415  inverse_vector_type D_local(Inverses_[blockIndex]->getDomainMap());
416  TEUCHOS_TEST_FOR_EXCEPTION(
417  D_local.getLocalLength() != size_t(this->blockSizes_[blockIndex]), std::logic_error,
418  "Ifpack2::SparseContainer::weightedApply: "
419  "D_local has length " << X_local.getLocalLength () << ", which does "
420  "not match numRows = " << this->blockSizes_[blockIndex] << ". Please report this bug to "
421  "the Ifpack2 developers.");
422  mvgs.gatherMVtoView(D_local, D, blockRows);
423  inverse_mv_type X_scaled(Inverses_[blockIndex]->getDomainMap(), numVecs);
424  X_scaled.elementWiseMultiply(STS::one(), D_local, X_local, STS::zero());
425 
426  // Y_temp will hold the result of M^{-1}*X_scaled. If beta == 0, we
427  // can write the result of Inverse_->apply() directly to Y_local, so
428  // Y_temp may alias Y_local. Otherwise, if beta != 0, we need
429  // temporary storage for M^{-1}*X_scaled, so Y_temp must be
430  // different than Y_local.
431  inverse_mv_type* Y_temp;
432  if (InverseScalar(beta) == STS::zero ()) {
433  Y_temp = &Y_local;
434  } else {
435  Y_temp = new inverse_mv_type(Inverses_[blockIndex]->getRangeMap(), numVecs);
436  }
437  // Apply the local operator: Y_temp := M^{-1} * X_scaled
438  Inverses_[blockIndex]->apply(X_scaled, *Y_temp, mode);
439  // Y_local := beta * Y_local + alpha * diag(D_local) * Y_tmp.
440  //
441  // Note that we still use the permuted subset scaling D_local here,
442  // because Y_temp has the same permuted subset Map. That's good, in
443  // fact, because it's a subset: less data to read and multiply.
444  Y_local.elementWiseMultiply(alpha, D_local, *Y_temp, beta);
445  if(Y_temp != &Y_local)
446  delete Y_temp;
447  // Copy the permuted subset output vector Y_local into the original
448  // output multivector Y.
449  mvgs.scatterMVtoView(Y, Y_local, blockRows);
450 }
451 
452 //==============================================================================
453 template<class MatrixType, class InverseType>
454 std::ostream& SparseContainer<MatrixType,InverseType>::print(std::ostream& os) const
455 {
456  Teuchos::FancyOStream fos(Teuchos::rcp(&os,false));
457  fos.setOutputToRootOnly(0);
458  describe(fos);
459  return(os);
460 }
461 
462 //==============================================================================
463 template<class MatrixType, class InverseType>
465 {
466  std::ostringstream oss;
467  oss << "\"Ifpack2::SparseContainer\": {";
468  if (this->isInitialized()) {
469  if (this->isComputed()) {
470  oss << "status = initialized, computed";
471  }
472  else {
473  oss << "status = initialized, not computed";
474  }
475  }
476  else {
477  oss << "status = not initialized, not computed";
478  }
479  for(int i = 0; i < this->numBlocks_; i++)
480  {
481  oss << ", Block Inverse " << i << ": {";
482  oss << Inverses_[i]->description();
483  oss << "}";
484  }
485  oss << "}";
486  return oss.str();
487 }
488 
489 //==============================================================================
490 template<class MatrixType, class InverseType>
492 {
493  using std::endl;
494  if(verbLevel==Teuchos::VERB_NONE) return;
495  os << "================================================================================" << endl;
496  os << "Ifpack2::SparseContainer" << endl;
497  for(int i = 0; i < this->numBlocks_; i++)
498  {
499  os << "Block " << i << " rows: = " << this->blockSizes_[i] << endl;
500  }
501  os << "isInitialized() = " << this->IsInitialized_ << endl;
502  os << "isComputed() = " << this->IsComputed_ << endl;
503  os << "================================================================================" << endl;
504  os << endl;
505 }
506 
507 //==============================================================================
508 template<class MatrixType, class InverseType>
510 extract ()
511 {
512  using Teuchos::Array;
513  using Teuchos::ArrayView;
514  using Teuchos::RCP;
515  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
516  //To extract diagonal blocks, need to translate local rows to local columns.
517  //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
518  //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
519  //offset - blockOffsets_[b]: gives the column within block b.
520  //
521  //This provides the block and col within a block in O(1).
522  Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
523  Teuchos::Array<InverseScalar> valuesToInsert;
524  if(this->scalarsPerRow_ > 1)
525  {
526  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
527  for(int i = 0; i < this->numBlocks_; i++)
528  {
529  //Get the interval where block i is defined in blockRows_
530  LO blockStart = this->blockOffsets_[i];
531  LO blockSize = this->blockSizes_[i];
532  LO blockPointSize = this->blockSizes_[i] * this->scalarsPerRow_;
533  LO blockEnd = blockStart + blockSize;
534  ArrayView<const LO> blockRows = this->getBlockRows(i);
535  //Set the lookup table entries for the columns appearing in block i.
536  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
537  //this is OK. The values updated here are only needed to process block i's entries.
538  for(LO j = 0; j < blockSize; j++)
539  {
540  LO localCol = this->translateRowToCol(blockRows[j]);
541  colToBlockOffset[localCol] = blockStart + j;
542  }
543  //First, count the number of entries in each row of block i
544  //(to allocate it with StaticProfile)
545  Array<size_t> rowEntryCounts(blockPointSize, 0);
546  //blockRow counts the BlockCrs LIDs that are going into this block
547  //Rows are inserted into the CrsMatrix in sequential order
548  using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
549  using vals_type = typename block_crs_matrix_type::values_host_view_type;
550  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
551  {
552  //get a raw view of the whole block row
553  inds_type indices;
554  vals_type values;
555  LO inputRow = this->blockRows_[blockStart + blockRow];
556  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
557  LO numEntries = (LO) indices.size();
558  for(LO br = 0; br < this->bcrsBlockSize_; br++)
559  {
560  for(LO k = 0; k < numEntries; k++)
561  {
562  LO colOffset = colToBlockOffset[indices[k]];
563  if(blockStart <= colOffset && colOffset < blockEnd)
564  {
565  rowEntryCounts[blockRow * this->bcrsBlockSize_ + br] += this->bcrsBlockSize_;
566  }
567  }
568  }
569  }
570  //now that row sizes are known, can allocate the diagonal matrix
571  RCP<InverseMap> tempMap(new InverseMap(blockPointSize, 0, this->localComm_));
572  diagBlocks_[i] = rcp(new InverseCrs(tempMap, rowEntryCounts));
573  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
574  //insert the actual entries, one row at a time
575  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
576  {
577  //get a raw view of the whole block row
578  inds_type indices;
579  vals_type values;
580  LO inputRow = this->blockRows_[blockStart + blockRow];
581  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
582  LO numEntries = (LO) indices.size();
583  for(LO br = 0; br < this->bcrsBlockSize_; br++)
584  {
585  indicesToInsert.clear();
586  valuesToInsert.clear();
587  for(LO k = 0; k < numEntries; k++)
588  {
589  LO colOffset = colToBlockOffset[indices[k]];
590  if(blockStart <= colOffset && colOffset < blockEnd)
591  {
592  LO blockCol = colOffset - blockStart;
593  //bc iterates over the columns in (block) entry k
594  for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
595  {
596  indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
597  valuesToInsert.push_back(values[k * this->bcrsBlockSize_ * this->bcrsBlockSize_ + bc * this->bcrsBlockSize_ + br]);
598  }
599  }
600  }
601  InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
602  if(indicesToInsert.size())
603  diagBlocks_[i]->insertGlobalValues(rowToInsert, indicesToInsert(), valuesToInsert());
604  }
605  }
606  diagBlocks_[i]->fillComplete();
607  }
608  }
609  else
610  {
611  //get the mapping from point-indexed matrix columns to offsets in blockRows_
612  //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
613  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
614  for(int i = 0; i < this->numBlocks_; i++)
615  {
616  //Get the interval where block i is defined in blockRows_
617  LO blockStart = this->blockOffsets_[i];
618  LO blockSize = this->blockSizes_[i];
619  LO blockEnd = blockStart + blockSize;
620  ArrayView<const LO> blockRows = this->getBlockRows(i);
621  //Set the lookup table entries for the columns appearing in block i.
622  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
623  //this is OK. The values updated here are only needed to process block i's entries.
624  for(LO j = 0; j < blockSize; j++)
625  {
626  //translateRowToCol will return the corresponding split column
627  LO localCol = this->translateRowToCol(blockRows[j]);
628  colToBlockOffset[localCol] = blockStart + j;
629  }
630  Teuchos::Array<size_t> rowEntryCounts(blockSize, 0);
631  for(LO j = 0; j < blockSize; j++)
632  {
633  rowEntryCounts[j] = this->getInputRowView(this->blockRows_[blockStart + j]).size();
634  }
635  RCP<InverseMap> tempMap(new InverseMap(blockSize, 0, this->localComm_));
636  diagBlocks_[i] = rcp(new InverseCrs(tempMap, rowEntryCounts));
637  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
638  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
639  {
640  valuesToInsert.clear();
641  indicesToInsert.clear();
642  //get a view of the split row
643  LO inputSplitRow = this->blockRows_[blockStart + blockRow];
644  auto rowView = this->getInputRowView(inputSplitRow);
645  for(size_t k = 0; k < rowView.size(); k++)
646  {
647  LO colOffset = colToBlockOffset[rowView.ind(k)];
648  if(blockStart <= colOffset && colOffset < blockEnd)
649  {
650  LO blockCol = colOffset - blockStart;
651  indicesToInsert.push_back(blockCol);
652  valuesToInsert.push_back(rowView.val(k));
653  }
654  }
655  if(indicesToInsert.size())
656  diagBlocks_[i]->insertGlobalValues(blockRow, indicesToInsert(), valuesToInsert());
657  }
658  diagBlocks_[i]->fillComplete ();
659  }
660  }
661 }
662 
663 //==============================================================================
664 template<class MatrixType, class InverseType>
665 void SparseContainer<MatrixType,InverseType>::
666 extractGraph ()
667 {
668  using Teuchos::Array;
669  using Teuchos::ArrayView;
670  using Teuchos::RCP;
671  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
672  //To extract diagonal blocks, need to translate local rows to local columns.
673  //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
674  //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
675  //offset - blockOffsets_[b]: gives the column within block b.
676  //
677  //This provides the block and col within a block in O(1).
679  Teuchos::TimeMonitor::getNewCounter ("Ifpack2::SparseContainer::extractGraph");
680  Teuchos::TimeMonitor timeMon (*timer);
681 
682  Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
683  if(this->scalarsPerRow_ > 1)
684  {
685  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
686  for(int i = 0; i < this->numBlocks_; i++)
687  {
688  //Get the interval where block i is defined in blockRows_
689  LO blockStart = this->blockOffsets_[i];
690  LO blockSize = this->blockSizes_[i];
691  LO blockPointSize = this->blockSizes_[i] * this->scalarsPerRow_;
692  LO blockEnd = blockStart + blockSize;
693  ArrayView<const LO> blockRows = this->getBlockRows(i);
694  //Set the lookup table entries for the columns appearing in block i.
695  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
696  //this is OK. The values updated here are only needed to process block i's entries.
697  for(LO j = 0; j < blockSize; j++)
698  {
699  LO localCol = this->translateRowToCol(blockRows[j]);
700  colToBlockOffset[localCol] = blockStart + j;
701  }
702  //First, count the number of entries in each row of block i
703  //(to allocate it with StaticProfile)
704  Array<size_t> rowEntryCounts(blockPointSize, 0);
705  //blockRow counts the BlockCrs LIDs that are going into this block
706  //Rows are inserted into the CrsMatrix in sequential order
707  using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
708  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
709  {
710  //get a raw view of the whole block row
711  inds_type indices;
712  LO inputRow = this->blockRows_[blockStart + blockRow];
713  this->inputBlockMatrix_->getGraph()->getLocalRowView(inputRow, indices);
714  LO numEntries = (LO) indices.size();
715  for(LO br = 0; br < this->bcrsBlockSize_; br++)
716  {
717  for(LO k = 0; k < numEntries; k++)
718  {
719  LO colOffset = colToBlockOffset[indices[k]];
720  if(blockStart <= colOffset && colOffset < blockEnd)
721  {
722  rowEntryCounts[blockRow * this->bcrsBlockSize_ + br] += this->bcrsBlockSize_;
723  }
724  }
725  }
726  }
727  //now that row sizes are known, can allocate the diagonal matrix
728  RCP<InverseMap> tempMap(new InverseMap(blockPointSize, 0, this->localComm_));
729  auto diagGraph = rcp(new InverseGraph(tempMap, rowEntryCounts));
730  //insert the actual entries, one row at a time
731  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
732  {
733  //get a raw view of the whole block row
734  inds_type indices;
735  LO inputRow = this->blockRows_[blockStart + blockRow];
736  this->inputBlockMatrix_->getGraph()->getLocalRowView(inputRow, indices);
737  LO numEntries = (LO) indices.size();
738  for(LO br = 0; br < this->bcrsBlockSize_; br++)
739  {
740  indicesToInsert.clear();
741  for(LO k = 0; k < numEntries; k++)
742  {
743  LO colOffset = colToBlockOffset[indices[k]];
744  if(blockStart <= colOffset && colOffset < blockEnd)
745  {
746  LO blockCol = colOffset - blockStart;
747  //bc iterates over the columns in (block) entry k
748  for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
749  {
750  indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
751  }
752  }
753  }
754  InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
755  if(indicesToInsert.size())
756  diagGraph->insertGlobalIndices(rowToInsert, indicesToInsert());
757  }
758  }
759  diagGraph->fillComplete();
760 
761  // create matrix block
762  diagBlocks_[i] = rcp(new InverseCrs(diagGraph));
763  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
764  }
765  }
766  else
767  {
768  //get the mapping from point-indexed matrix columns to offsets in blockRows_
769  //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
770  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
771  for(int i = 0; i < this->numBlocks_; i++)
772  {
773  //Get the interval where block i is defined in blockRows_
774  LO blockStart = this->blockOffsets_[i];
775  LO blockSize = this->blockSizes_[i];
776  LO blockEnd = blockStart + blockSize;
777  ArrayView<const LO> blockRows = this->getBlockRows(i);
778  //Set the lookup table entries for the columns appearing in block i.
779  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
780  //this is OK. The values updated here are only needed to process block i's entries.
781  for(LO j = 0; j < blockSize; j++)
782  {
783  //translateRowToCol will return the corresponding split column
784  LO localCol = this->translateRowToCol(blockRows[j]);
785  colToBlockOffset[localCol] = blockStart + j;
786  }
787  Teuchos::Array<size_t> rowEntryCounts(blockSize, 0);
788  for(LO j = 0; j < blockSize; j++)
789  {
790  rowEntryCounts[j] = this->getInputRowView(this->blockRows_[blockStart + j]).size();
791  }
792  RCP<InverseMap> tempMap(new InverseMap(blockSize, 0, this->localComm_));
793  auto diagGraph = rcp(new InverseGraph(tempMap, rowEntryCounts));
794  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
795  {
796  indicesToInsert.clear();
797  //get a view of the split row
798  LO inputSplitRow = this->blockRows_[blockStart + blockRow];
799  auto rowView = this->getInputRowView(inputSplitRow);
800  for(size_t k = 0; k < rowView.size(); k++)
801  {
802  LO colOffset = colToBlockOffset[rowView.ind(k)];
803  if(blockStart <= colOffset && colOffset < blockEnd)
804  {
805  LO blockCol = colOffset - blockStart;
806  indicesToInsert.push_back(blockCol);
807  }
808  }
809  if(indicesToInsert.size())
810  diagGraph->insertGlobalIndices(blockRow, indicesToInsert());
811  }
812  diagGraph->fillComplete();
813 
814  // create matrix block
815  diagBlocks_[i] = rcp(new InverseCrs(diagGraph));
816  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
817  }
818  }
819 }
820 
821 //==============================================================================
822 template<class MatrixType, class InverseType>
823 void SparseContainer<MatrixType,InverseType>::
824 extractValues ()
825 {
826  using Teuchos::Array;
827  using Teuchos::ArrayView;
828  using Teuchos::RCP;
829  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
830  //To extract diagonal blocks, need to translate local rows to local columns.
831  //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
832  //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
833  //offset - blockOffsets_[b]: gives the column within block b.
834  //
835  //This provides the block and col within a block in O(1).
837  Teuchos::TimeMonitor::getNewCounter ("Ifpack2::SparseContainer::extractValues");
838  Teuchos::TimeMonitor timeMon (*timer);
839 
840  Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
841  Teuchos::Array<InverseScalar> valuesToInsert;
842  if(this->scalarsPerRow_ > 1)
843  {
844  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
845  for(int i = 0; i < this->numBlocks_; i++)
846  {
847  //Get the interval where block i is defined in blockRows_
848  LO blockStart = this->blockOffsets_[i];
849  LO blockSize = this->blockSizes_[i];
850  LO blockEnd = blockStart + blockSize;
851  ArrayView<const LO> blockRows = this->getBlockRows(i);
852  //Set the lookup table entries for the columns appearing in block i.
853  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
854  //this is OK. The values updated here are only needed to process block i's entries.
855  for(LO j = 0; j < blockSize; j++)
856  {
857  LO localCol = this->translateRowToCol(blockRows[j]);
858  colToBlockOffset[localCol] = blockStart + j;
859  }
860  using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
861  using vals_type = typename block_crs_matrix_type::values_host_view_type;
862  //insert the actual entries, one row at a time
863  diagBlocks_[i]->resumeFill();
864  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
865  {
866  //get a raw view of the whole block row
867  inds_type indices;
868  vals_type values;
869  LO inputRow = this->blockRows_[blockStart + blockRow];
870  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
871  LO numEntries = (LO) indices.size();
872  for(LO br = 0; br < this->bcrsBlockSize_; br++)
873  {
874  indicesToInsert.clear();
875  valuesToInsert.clear();
876  for(LO k = 0; k < numEntries; k++)
877  {
878  LO colOffset = colToBlockOffset[indices[k]];
879  if(blockStart <= colOffset && colOffset < blockEnd)
880  {
881  LO blockCol = colOffset - blockStart;
882  //bc iterates over the columns in (block) entry k
883  for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
884  {
885  indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
886  valuesToInsert.push_back(values[k * this->bcrsBlockSize_ * this->bcrsBlockSize_ + bc * this->bcrsBlockSize_ + br]);
887  }
888  }
889  }
890  InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
891  if(indicesToInsert.size())
892  diagBlocks_[i]->replaceGlobalValues(rowToInsert, indicesToInsert(), valuesToInsert());
893  }
894  }
895  diagBlocks_[i]->fillComplete();
896  }
897  }
898  else
899  {
900  //get the mapping from point-indexed matrix columns to offsets in blockRows_
901  //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
902  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
903  for(int i = 0; i < this->numBlocks_; i++)
904  {
905  //Get the interval where block i is defined in blockRows_
906  LO blockStart = this->blockOffsets_[i];
907  LO blockSize = this->blockSizes_[i];
908  LO blockEnd = blockStart + blockSize;
909  ArrayView<const LO> blockRows = this->getBlockRows(i);
910  //Set the lookup table entries for the columns appearing in block i.
911  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
912  //this is OK. The values updated here are only needed to process block i's entries.
913  for(LO j = 0; j < blockSize; j++)
914  {
915  //translateRowToCol will return the corresponding split column
916  LO localCol = this->translateRowToCol(blockRows[j]);
917  colToBlockOffset[localCol] = blockStart + j;
918  }
919  diagBlocks_[i]->resumeFill();
920  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
921  {
922  valuesToInsert.clear();
923  indicesToInsert.clear();
924  //get a view of the split row
925  LO inputSplitRow = this->blockRows_[blockStart + blockRow];
926  auto rowView = this->getInputRowView(inputSplitRow);
927  for(size_t k = 0; k < rowView.size(); k++)
928  {
929  LO colOffset = colToBlockOffset[rowView.ind(k)];
930  if(blockStart <= colOffset && colOffset < blockEnd)
931  {
932  LO blockCol = colOffset - blockStart;
933  indicesToInsert.push_back(blockCol);
934  valuesToInsert.push_back(rowView.val(k));
935  }
936  }
937  if(indicesToInsert.size())
938  diagBlocks_[i]->replaceGlobalValues(blockRow, indicesToInsert, valuesToInsert);
939  }
940  diagBlocks_[i]->fillComplete ();
941  }
942  }
943 }
944 
945 template<typename MatrixType, typename InverseType>
947 {
948  typedef ILUT<Tpetra::RowMatrix<SC, LO, GO, NO> > ILUTInverse;
949 #ifdef HAVE_IFPACK2_AMESOS2
951  if(std::is_same<InverseType, ILUTInverse>::value)
952  {
953  return "SparseILUT";
954  }
955  else if(std::is_same<InverseType, AmesosInverse>::value)
956  {
957  return "SparseAmesos";
958  }
959  else
960  {
961  throw std::logic_error("InverseType for SparseContainer must be Ifpack2::ILUT or Details::Amesos2Wrapper");
962  }
963 #else
964  // Macros can't have commas in their arguments, so we have to
965  // compute the bool first argument separately.
966  constexpr bool inverseTypeIsILUT = std::is_same<InverseType, ILUTInverse>::value;
967  TEUCHOS_TEST_FOR_EXCEPTION(! inverseTypeIsILUT, std::logic_error,
968  "InverseType for SparseContainer must be Ifpack2::ILUT<ROW>");
969  return "SparseILUT"; //the only supported sparse container specialization if no Amesos2
970 #endif
971 }
972 
973 } // namespace Ifpack2
974 
975 // For ETI
976 #include "Ifpack2_ILUT.hpp"
977 #ifdef HAVE_IFPACK2_AMESOS2
978 #include "Ifpack2_Details_Amesos2Wrapper.hpp"
979 #endif
980 
981 // There's no need to instantiate for CrsMatrix too. All Ifpack2
982 // preconditioners can and should do dynamic casts if they need a type
983 // more specific than RowMatrix.
984 
985 #ifdef HAVE_IFPACK2_AMESOS2
986 # define IFPACK2_SPARSECONTAINER_INSTANT(S,LO,GO,N) \
987  template class Ifpack2::SparseContainer< Tpetra::RowMatrix<S, LO, GO, N>, \
988  Ifpack2::ILUT<Tpetra::RowMatrix<S,LO,GO,N> > >; \
989  template class Ifpack2::SparseContainer< Tpetra::RowMatrix<S, LO, GO, N>, \
990  Ifpack2::Details::Amesos2Wrapper<Tpetra::RowMatrix<S,LO,GO,N> > >;
991 #else
992 # define IFPACK2_SPARSECONTAINER_INSTANT(S,LO,GO,N) \
993  template class Ifpack2::SparseContainer< Tpetra::RowMatrix<S,LO,GO,N>, \
994  Ifpack2::ILUT<Tpetra::RowMatrix<S, LO, GO, N> > >;
995 #endif
996 #endif // IFPACK2_SPARSECONTAINER_DEF_HPP
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_SparseContainer_def.hpp:946
SparseContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_SparseContainer_def.hpp:60
Store and solve a local sparse linear problem.
Definition: Ifpack2_SparseContainer_decl.hpp:134
static RCP< Time > getNewCounter(const std::string &name)
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_SparseContainer_def.hpp:491
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:343
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_SparseContainer_def.hpp:86
ILUT (incomplete LU factorization with threshold) of a Tpetra sparse matrix.
Definition: Ifpack2_ILUT_decl.hpp:93
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_SparseContainer_def.hpp:454
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_SparseContainer_def.hpp:464
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Wrapper class for direct solvers in Amesos2.
Definition: Ifpack2_Details_Amesos2Wrapper_decl.hpp:102
typename mv_type::dual_view_type::t_host HostView
Definition: Ifpack2_Container_decl.hpp:139
virtual void apply(ConstHostView X, HostView Y, int blockIndex, Teuchos::ETransp mode=Teuchos::NO_TRANS, SC alpha=Teuchos::ScalarTraits< SC >::one(), SC beta=Teuchos::ScalarTraits< SC >::zero()) const
Compute Y := alpha * M^{-1} X + beta*Y.
Definition: Ifpack2_SparseContainer_def.hpp:180
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
virtual void weightedApply(ConstHostView X, HostView Y, ConstHostView W, int blockIndex, Teuchos::ETransp mode=Teuchos::NO_TRANS, SC alpha=Teuchos::ScalarTraits< SC >::one(), SC beta=Teuchos::ScalarTraits< SC >::zero()) const
Compute Y := alpha * diag(D) * M^{-1} (diag(D) * X) + beta*Y.
Definition: Ifpack2_SparseContainer_def.hpp:304
void push_back(const value_type &x)
virtual ~SparseContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_SparseContainer_def.hpp:75
size_type size() const
TypeTo as(const TypeFrom &t)
virtual void setParameters(const Teuchos::ParameterList &List)
Set all necessary parameters.
Definition: Ifpack2_SparseContainer_def.hpp:79
virtual void compute()
Initialize and compute all blocks.
Definition: Ifpack2_SparseContainer_def.hpp:118
Ifpack2::SparseContainer class declaration.
void clearBlocks()
Definition: Ifpack2_SparseContainer_def.hpp:142
Implementation detail of Ifpack2::Container subclasses.
Definition: Ifpack2_Details_MultiVectorLocalGatherScatter.hpp:85