Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_SparseContainer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_SPARSECONTAINER_DEF_HPP
11 #define IFPACK2_SPARSECONTAINER_DEF_HPP
12 
14 #ifdef HAVE_MPI
15 #include <mpi.h>
17 #else
18 #include "Teuchos_DefaultSerialComm.hpp"
19 #endif
21 
22 namespace Ifpack2 {
23 
24 //==============================================================================
25 template<class MatrixType, class InverseType>
28  const Teuchos::Array<Teuchos::Array<LO> >& partitions,
30  bool pointIndexed) :
31  ContainerImpl<MatrixType, InverseScalar> (matrix, partitions, pointIndexed),
32 #ifdef HAVE_MPI
33  localComm_ (Teuchos::rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF)))
34 #else
35  localComm_ (Teuchos::rcp (new Teuchos::SerialComm<int> ()))
36 #endif // HAVE_MPI
37 {}
38 
39 //==============================================================================
40 template<class MatrixType, class InverseType>
43 
44 //==============================================================================
45 template<class MatrixType, class InverseType>
47 {
48  List_ = List;
49 }
50 
51 //==============================================================================
52 template<class MatrixType, class InverseType>
54 {
55  // We assume that if you called this method, you intend to recompute
56  // everything. Thus, we release references to all the internal
57  // objects. We do this first to save memory. (In an RCP
58  // assignment, the right-hand side and left-hand side coexist before
59  // the left-hand side's reference count gets updated.)
61  Teuchos::TimeMonitor::getNewCounter ("Ifpack2::SparseContainer::initialize");
62  Teuchos::TimeMonitor timeMon (*timer);
63 
64  //Will create the diagonal blocks and their inverses
65  //in extract()
66  diagBlocks_.assign(this->numBlocks_, Teuchos::null);
67  Inverses_.assign(this->numBlocks_, Teuchos::null);
68 
69  // Extract the submatrices.
70  this->extractGraph();
71 
72  // Initialize the inverse operator.
73  for(int i = 0; i < this->numBlocks_; i++)
74  {
75  Inverses_[i]->setParameters(List_);
76  Inverses_[i]->initialize ();
77  }
78 
79  this->IsInitialized_ = true;
80  this->IsComputed_ = false;
81 }
82 
83 //==============================================================================
84 template<class MatrixType, class InverseType>
86 {
88  Teuchos::TimeMonitor::getNewCounter ("Ifpack2::SparseContainer::compute");
89  Teuchos::TimeMonitor timeMon (*timer);
90 
91  this->IsComputed_ = false;
92  if (!this->isInitialized ()) {
93  this->initialize ();
94  }
95 
96  // Extract the submatrices values
97  this->extractValues();
98 
99  // Compute the inverse operator.
100  for(int i = 0; i < this->numBlocks_; i++) {
101  Inverses_[i]->compute ();
102  }
103 
104  this->IsComputed_ = true;
105 }
106 
107 //==============================================================================
108 template<class MatrixType, class InverseType>
110 {
111  for(auto inv : Inverses_)
112  delete inv.get();
113  Inverses_.clear();
114  diagBlocks_.clear();
116 }
117 
118 //==============================================================================
119 template<class MatrixType, class InverseType>
121 solveBlockMV(const inverse_mv_type& X,
122  inverse_mv_type& Y,
123  int blockIndex,
124  Teuchos::ETransp mode,
125  InverseScalar alpha,
126  InverseScalar beta) const
127 {
129  Inverses_[blockIndex]->getDomainMap()->getLocalNumElements() != X.getLocalLength(),
130  std::logic_error, "Ifpack2::SparseContainer::apply: Inverse_ "
131  "operator and X have incompatible dimensions (" <<
132  Inverses_[blockIndex]->getDomainMap()->getLocalNumElements() << " resp. "
133  << X.getLocalLength() << "). Please report this bug to "
134  "the Ifpack2 developers.");
136  Inverses_[blockIndex]->getRangeMap()->getLocalNumElements() != Y.getLocalLength(),
137  std::logic_error, "Ifpack2::SparseContainer::apply: Inverse_ "
138  "operator and Y have incompatible dimensions (" <<
139  Inverses_[blockIndex]->getRangeMap()->getLocalNumElements() << " resp. "
140  << Y.getLocalLength() << "). Please report this bug to "
141  "the Ifpack2 developers.");
142  Inverses_[blockIndex]->apply(X, Y, mode, alpha, beta);
143 }
144 
145 template<class MatrixType, class InverseType>
147 apply (ConstHostView X,
148  HostView Y,
149  int blockIndex,
150  Teuchos::ETransp mode,
151  SC alpha,
152  SC beta) const
153 {
154  using Teuchos::ArrayView;
155  using Teuchos::as;
156 
157  // The InverseType template parameter might have different template
158  // parameters (Scalar, LO, GO, and/or Node) than MatrixType. For
159  // example, MatrixType (a global object) might use a bigger GO
160  // (global ordinal) type than InverseType (which deals with the
161  // diagonal block, a local object). This means that we might have
162  // to convert X and Y to the Tpetra::MultiVector specialization that
163  // InverseType wants. This class' X_ and Y_ internal fields are of
164  // the right type for InverseType, so we can use those as targets.
166  Teuchos::TimeMonitor::getNewCounter ("Ifpack2::SparseContainer::apply");
167  Teuchos::TimeMonitor timeMon (*timer);
168 
169 
170  // Tpetra::MultiVector specialization corresponding to InverseType.
172  size_t numVecs = X.extent(1);
173 
175  ! this->IsComputed_, std::runtime_error, "Ifpack2::SparseContainer::apply: "
176  "You must have called the compute() method before you may call apply(). "
177  "You may call the apply() method as many times as you want after calling "
178  "compute() once, but you must have called compute() at least once.");
180  X.extent(1) != Y.extent(1), std::runtime_error,
181  "Ifpack2::SparseContainer::apply: X and Y have different numbers of "
182  "vectors. X has " << X.extent(1)
183  << ", but Y has " << Y.extent(1) << ".");
184 
185  if (numVecs == 0) {
186  return; // done! nothing to do
187  }
188 
189  const LO numRows = this->blockSizes_[blockIndex];
190 
191  // The operator Inverse_ works on a permuted subset of the local
192  // parts of X and Y. The subset and permutation are defined by the
193  // index array returned by getBlockRows(). If the permutation is
194  // trivial and the subset is exactly equal to the local indices,
195  // then we could use the local parts of X and Y exactly, without
196  // needing to permute. Otherwise, we have to use temporary storage
197  // to permute X and Y. For now, we always use temporary storage.
198  //
199  // Create temporary permuted versions of the input and output.
200  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
201  // store the permuted versions of X resp. Y. Note that X_local has
202  // the domain Map of the operator, which may be a permuted subset of
203  // the local Map corresponding to X.getMap(). Similarly, Y_local
204  // has the range Map of the operator, which may be a permuted subset
205  // of the local Map corresponding to Y.getMap(). numRows here
206  // gives the number of rows in the row Map of the local Inverse_
207  // operator.
208  //
209  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
210  // here that the row Map and the range Map of that operator are
211  // the same.
212  //
213  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
214  // really belongs in Tpetra.
215 
216  if(invX.size() == 0)
217  {
218  for(LO i = 0; i < this->numBlocks_; i++)
219  invX.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
220  for(LO i = 0; i < this->numBlocks_; i++)
221  invY.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
222  }
223  inverse_mv_type& X_local = invX[blockIndex];
225  X_local.getLocalLength() != size_t(numRows * this->scalarsPerRow_), std::logic_error,
226  "Ifpack2::SparseContainer::apply: "
227  "X_local has length " << X_local.getLocalLength() << ", which does "
228  "not match numRows = " << numRows * this->scalarsPerRow_ << ". Please report this bug to "
229  "the Ifpack2 developers.");
230  const ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
231  if(this->scalarsPerRow_ == 1)
232  mvgs.gatherMVtoView(X_local, X, blockRows);
233  else
234  mvgs.gatherMVtoViewBlock(X_local, X, blockRows, this->scalarsPerRow_);
235 
236  // We must gather the output multivector Y even on input to
237  // Inverse_->apply(), since the Inverse_ operator might use it as an
238  // initial guess for a linear solve. We have no way of knowing
239  // whether it does or does not.
240 
241  inverse_mv_type& Y_local = invY[blockIndex];
243  Y_local.getLocalLength () != size_t(numRows * this->scalarsPerRow_), std::logic_error,
244  "Ifpack2::SparseContainer::apply: "
245  "Y_local has length " << Y_local.getLocalLength () << ", which does "
246  "not match numRows = " << numRows * this->scalarsPerRow_ << ". Please report this bug to "
247  "the Ifpack2 developers.");
248 
249  if(this->scalarsPerRow_ == 1)
250  mvgs.gatherMVtoView(Y_local, Y, blockRows);
251  else
252  mvgs.gatherMVtoViewBlock(Y_local, Y, blockRows, this->scalarsPerRow_);
253 
254  // Apply the local operator:
255  // Y_local := beta*Y_local + alpha*M^{-1}*X_local
256  this->solveBlockMV(X_local, Y_local, blockIndex, mode,
257  InverseScalar(alpha), InverseScalar(beta));
258 
259 
260  // Scatter the permuted subset output vector Y_local back into the
261  // original output multivector Y.
262  if(this->scalarsPerRow_ == 1)
263  mvgs.scatterMVtoView(Y, Y_local, blockRows);
264  else
265  mvgs.scatterMVtoViewBlock(Y, Y_local, blockRows, this->scalarsPerRow_);
266 }
267 
268 //==============================================================================
269 template<class MatrixType, class InverseType>
271 weightedApply (ConstHostView X,
272  HostView Y,
273  ConstHostView D,
274  int blockIndex,
275  Teuchos::ETransp mode,
276  SC alpha,
277  SC beta) const
278 {
279  using Teuchos::ArrayView;
280  using Teuchos::Range1D;
281  using std::cerr;
282  using std::endl;
284 
285  // The InverseType template parameter might have different template
286  // parameters (Scalar, LO, GO, and/or Node) than MatrixType. For
287  // example, MatrixType (a global object) might use a bigger GO
288  // (global ordinal) type than InverseType (which deals with the
289  // diagonal block, a local object). This means that we might have
290  // to convert X and Y to the Tpetra::MultiVector specialization that
291  // InverseType wants. This class' X_ and Y_ internal fields are of
292  // the right type for InverseType, so we can use those as targets.
293 
294  // Tpetra::Vector specialization corresponding to InverseType.
295  typedef Tpetra::Vector<InverseScalar, InverseLocalOrdinal, InverseGlobalOrdinal, InverseNode> inverse_vector_type;
296 
298  const size_t numVecs = X.extent(1);
299 
301  ! this->IsComputed_, std::runtime_error, "Ifpack2::SparseContainer::"
302  "weightedApply: You must have called the compute() method before you may "
303  "call apply(). You may call the apply() method as many times as you want "
304  "after calling compute() once, but you must have called compute() at least "
305  "once.");
307  X.extent(1) != Y.extent(1), std::runtime_error,
308  "Ifpack2::SparseContainer::weightedApply: X and Y have different numbers "
309  "of vectors. X has " << X.extent(1) << ", but Y has "
310  << Y.extent(1) << ".");
311 
312  //bmk 7-2019: BlockRelaxation already checked this, but if that changes...
314  this->scalarsPerRow_ > 1, std::logic_error, "Ifpack2::SparseContainer::weightedApply: "
315  "Use of block rows isn't allowed in overlapping Jacobi (the only method that uses weightedApply");
316 
317  if (numVecs == 0) {
318  return; // done! nothing to do
319  }
320 
321  // The operator Inverse_ works on a permuted subset of the local
322  // parts of X and Y. The subset and permutation are defined by the
323  // index array returned by getBlockRows(). If the permutation is
324  // trivial and the subset is exactly equal to the local indices,
325  // then we could use the local parts of X and Y exactly, without
326  // needing to permute. Otherwise, we have to use temporary storage
327  // to permute X and Y. For now, we always use temporary storage.
328  //
329  // Create temporary permuted versions of the input and output.
330  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
331  // store the permuted versions of X resp. Y. Note that X_local has
332  // the domain Map of the operator, which may be a permuted subset of
333  // the local Map corresponding to X.getMap(). Similarly, Y_local
334  // has the range Map of the operator, which may be a permuted subset
335  // of the local Map corresponding to Y.getMap(). numRows here
336  // gives the number of rows in the row Map of the local Inverse_
337  // operator.
338  //
339  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
340  // here that the row Map and the range Map of that operator are
341  // the same.
342  //
343  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
344  // really belongs in Tpetra.
345  const LO numRows = this->blockSizes_[blockIndex];
346 
347  if(invX.size() == 0)
348  {
349  for(LO i = 0; i < this->numBlocks_; i++)
350  invX.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
351  for(LO i = 0; i < this->numBlocks_; i++)
352  invY.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
353  }
354  inverse_mv_type& X_local = invX[blockIndex];
355  const ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
356  mvgs.gatherMVtoView(X_local, X, blockRows);
357 
358  // We must gather the output multivector Y even on input to
359  // Inverse_->apply(), since the Inverse_ operator might use it as an
360  // initial guess for a linear solve. We have no way of knowing
361  // whether it does or does not.
362 
363  inverse_mv_type Y_local = invY[blockIndex];
364  TEUCHOS_TEST_FOR_EXCEPTION(
365  Y_local.getLocalLength() != size_t(numRows), std::logic_error,
366  "Ifpack2::SparseContainer::weightedApply: "
367  "Y_local has length " << X_local.getLocalLength() << ", which does "
368  "not match numRows = " << numRows << ". Please report this bug to "
369  "the Ifpack2 developers.");
370  mvgs.gatherMVtoView(Y_local, Y, blockRows);
371 
372  // Apply the diagonal scaling D to the input X. It's our choice
373  // whether the result has the original input Map of X, or the
374  // permuted subset Map of X_local. If the latter, we also need to
375  // gather D into the permuted subset Map. We choose the latter, to
376  // save memory and computation. Thus, we do the following:
377  //
378  // 1. Gather D into a temporary vector D_local.
379  // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
380  // 3. Compute X_scaled := diag(D_loca) * X_local.
381 
382  inverse_vector_type D_local(Inverses_[blockIndex]->getDomainMap());
383  TEUCHOS_TEST_FOR_EXCEPTION(
384  D_local.getLocalLength() != size_t(this->blockSizes_[blockIndex]), std::logic_error,
385  "Ifpack2::SparseContainer::weightedApply: "
386  "D_local has length " << X_local.getLocalLength () << ", which does "
387  "not match numRows = " << this->blockSizes_[blockIndex] << ". Please report this bug to "
388  "the Ifpack2 developers.");
389  mvgs.gatherMVtoView(D_local, D, blockRows);
390  inverse_mv_type X_scaled(Inverses_[blockIndex]->getDomainMap(), numVecs);
391  X_scaled.elementWiseMultiply(STS::one(), D_local, X_local, STS::zero());
392 
393  // Y_temp will hold the result of M^{-1}*X_scaled. If beta == 0, we
394  // can write the result of Inverse_->apply() directly to Y_local, so
395  // Y_temp may alias Y_local. Otherwise, if beta != 0, we need
396  // temporary storage for M^{-1}*X_scaled, so Y_temp must be
397  // different than Y_local.
398  inverse_mv_type* Y_temp;
399  if (InverseScalar(beta) == STS::zero ()) {
400  Y_temp = &Y_local;
401  } else {
402  Y_temp = new inverse_mv_type(Inverses_[blockIndex]->getRangeMap(), numVecs);
403  }
404  // Apply the local operator: Y_temp := M^{-1} * X_scaled
405  Inverses_[blockIndex]->apply(X_scaled, *Y_temp, mode);
406  // Y_local := beta * Y_local + alpha * diag(D_local) * Y_tmp.
407  //
408  // Note that we still use the permuted subset scaling D_local here,
409  // because Y_temp has the same permuted subset Map. That's good, in
410  // fact, because it's a subset: less data to read and multiply.
411  Y_local.elementWiseMultiply(alpha, D_local, *Y_temp, beta);
412  if(Y_temp != &Y_local)
413  delete Y_temp;
414  // Copy the permuted subset output vector Y_local into the original
415  // output multivector Y.
416  mvgs.scatterMVtoView(Y, Y_local, blockRows);
417 }
418 
419 //==============================================================================
420 template<class MatrixType, class InverseType>
421 std::ostream& SparseContainer<MatrixType,InverseType>::print(std::ostream& os) const
422 {
423  Teuchos::FancyOStream fos(Teuchos::rcp(&os,false));
424  fos.setOutputToRootOnly(0);
425  describe(fos);
426  return(os);
427 }
428 
429 //==============================================================================
430 template<class MatrixType, class InverseType>
432 {
433  std::ostringstream oss;
434  oss << "\"Ifpack2::SparseContainer\": {";
435  if (this->isInitialized()) {
436  if (this->isComputed()) {
437  oss << "status = initialized, computed";
438  }
439  else {
440  oss << "status = initialized, not computed";
441  }
442  }
443  else {
444  oss << "status = not initialized, not computed";
445  }
446  for(int i = 0; i < this->numBlocks_; i++)
447  {
448  oss << ", Block Inverse " << i << ": {";
449  oss << Inverses_[i]->description();
450  oss << "}";
451  }
452  oss << "}";
453  return oss.str();
454 }
455 
456 //==============================================================================
457 template<class MatrixType, class InverseType>
459 {
460  using std::endl;
461  if(verbLevel==Teuchos::VERB_NONE) return;
462  os << "================================================================================" << endl;
463  os << "Ifpack2::SparseContainer" << endl;
464  for(int i = 0; i < this->numBlocks_; i++)
465  {
466  os << "Block " << i << " rows: = " << this->blockSizes_[i] << endl;
467  }
468  os << "isInitialized() = " << this->IsInitialized_ << endl;
469  os << "isComputed() = " << this->IsComputed_ << endl;
470  os << "================================================================================" << endl;
471  os << endl;
472 }
473 
474 //==============================================================================
475 template<class MatrixType, class InverseType>
477 extract ()
478 {
479  using Teuchos::Array;
480  using Teuchos::ArrayView;
481  using Teuchos::RCP;
482  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
483  //To extract diagonal blocks, need to translate local rows to local columns.
484  //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
485  //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
486  //offset - blockOffsets_[b]: gives the column within block b.
487  //
488  //This provides the block and col within a block in O(1).
489  Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
490  Teuchos::Array<InverseScalar> valuesToInsert;
491  if(this->scalarsPerRow_ > 1)
492  {
493  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
494  for(int i = 0; i < this->numBlocks_; i++)
495  {
496  //Get the interval where block i is defined in blockRows_
497  LO blockStart = this->blockOffsets_[i];
498  LO blockSize = this->blockSizes_[i];
499  LO blockPointSize = this->blockSizes_[i] * this->scalarsPerRow_;
500  LO blockEnd = blockStart + blockSize;
501  ArrayView<const LO> blockRows = this->getBlockRows(i);
502  //Set the lookup table entries for the columns appearing in block i.
503  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
504  //this is OK. The values updated here are only needed to process block i's entries.
505  for(LO j = 0; j < blockSize; j++)
506  {
507  LO localCol = this->translateRowToCol(blockRows[j]);
508  colToBlockOffset[localCol] = blockStart + j;
509  }
510  //First, count the number of entries in each row of block i
511  //(to allocate it with StaticProfile)
512  Array<size_t> rowEntryCounts(blockPointSize, 0);
513  //blockRow counts the BlockCrs LIDs that are going into this block
514  //Rows are inserted into the CrsMatrix in sequential order
515  using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
516  using vals_type = typename block_crs_matrix_type::values_host_view_type;
517  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
518  {
519  //get a raw view of the whole block row
520  inds_type indices;
521  vals_type values;
522  LO inputRow = this->blockRows_[blockStart + blockRow];
523  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
524  LO numEntries = (LO) indices.size();
525  for(LO br = 0; br < this->bcrsBlockSize_; br++)
526  {
527  for(LO k = 0; k < numEntries; k++)
528  {
529  LO colOffset = colToBlockOffset[indices[k]];
530  if(blockStart <= colOffset && colOffset < blockEnd)
531  {
532  rowEntryCounts[blockRow * this->bcrsBlockSize_ + br] += this->bcrsBlockSize_;
533  }
534  }
535  }
536  }
537  //now that row sizes are known, can allocate the diagonal matrix
538  RCP<InverseMap> tempMap(new InverseMap(blockPointSize, 0, this->localComm_));
539  diagBlocks_[i] = rcp(new InverseCrs(tempMap, rowEntryCounts));
540  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
541  //insert the actual entries, one row at a time
542  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
543  {
544  //get a raw view of the whole block row
545  inds_type indices;
546  vals_type values;
547  LO inputRow = this->blockRows_[blockStart + blockRow];
548  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
549  LO numEntries = (LO) indices.size();
550  for(LO br = 0; br < this->bcrsBlockSize_; br++)
551  {
552  indicesToInsert.clear();
553  valuesToInsert.clear();
554  for(LO k = 0; k < numEntries; k++)
555  {
556  LO colOffset = colToBlockOffset[indices[k]];
557  if(blockStart <= colOffset && colOffset < blockEnd)
558  {
559  LO blockCol = colOffset - blockStart;
560  //bc iterates over the columns in (block) entry k
561  for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
562  {
563  indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
564  valuesToInsert.push_back(values[k * this->bcrsBlockSize_ * this->bcrsBlockSize_ + bc * this->bcrsBlockSize_ + br]);
565  }
566  }
567  }
568  InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
569  if(indicesToInsert.size())
570  diagBlocks_[i]->insertGlobalValues(rowToInsert, indicesToInsert(), valuesToInsert());
571  }
572  }
573  diagBlocks_[i]->fillComplete();
574  }
575  }
576  else
577  {
578  //get the mapping from point-indexed matrix columns to offsets in blockRows_
579  //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
580  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
581  for(int i = 0; i < this->numBlocks_; i++)
582  {
583  //Get the interval where block i is defined in blockRows_
584  LO blockStart = this->blockOffsets_[i];
585  LO blockSize = this->blockSizes_[i];
586  LO blockEnd = blockStart + blockSize;
587  ArrayView<const LO> blockRows = this->getBlockRows(i);
588  //Set the lookup table entries for the columns appearing in block i.
589  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
590  //this is OK. The values updated here are only needed to process block i's entries.
591  for(LO j = 0; j < blockSize; j++)
592  {
593  //translateRowToCol will return the corresponding split column
594  LO localCol = this->translateRowToCol(blockRows[j]);
595  colToBlockOffset[localCol] = blockStart + j;
596  }
597  Teuchos::Array<size_t> rowEntryCounts(blockSize, 0);
598  for(LO j = 0; j < blockSize; j++)
599  {
600  rowEntryCounts[j] = this->getInputRowView(this->blockRows_[blockStart + j]).size();
601  }
602  RCP<InverseMap> tempMap(new InverseMap(blockSize, 0, this->localComm_));
603  diagBlocks_[i] = rcp(new InverseCrs(tempMap, rowEntryCounts));
604  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
605  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
606  {
607  valuesToInsert.clear();
608  indicesToInsert.clear();
609  //get a view of the split row
610  LO inputSplitRow = this->blockRows_[blockStart + blockRow];
611  auto rowView = this->getInputRowView(inputSplitRow);
612  for(size_t k = 0; k < rowView.size(); k++)
613  {
614  LO colOffset = colToBlockOffset[rowView.ind(k)];
615  if(blockStart <= colOffset && colOffset < blockEnd)
616  {
617  LO blockCol = colOffset - blockStart;
618  indicesToInsert.push_back(blockCol);
619  valuesToInsert.push_back(rowView.val(k));
620  }
621  }
622  if(indicesToInsert.size())
623  diagBlocks_[i]->insertGlobalValues(blockRow, indicesToInsert(), valuesToInsert());
624  }
625  diagBlocks_[i]->fillComplete ();
626  }
627  }
628 }
629 
630 //==============================================================================
631 template<class MatrixType, class InverseType>
632 void SparseContainer<MatrixType,InverseType>::
633 extractGraph ()
634 {
635  using Teuchos::Array;
636  using Teuchos::ArrayView;
637  using Teuchos::RCP;
638  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
639  //To extract diagonal blocks, need to translate local rows to local columns.
640  //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
641  //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
642  //offset - blockOffsets_[b]: gives the column within block b.
643  //
644  //This provides the block and col within a block in O(1).
646  Teuchos::TimeMonitor::getNewCounter ("Ifpack2::SparseContainer::extractGraph");
647  Teuchos::TimeMonitor timeMon (*timer);
648 
649  Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
650  if(this->scalarsPerRow_ > 1)
651  {
652  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
653  for(int i = 0; i < this->numBlocks_; i++)
654  {
655  //Get the interval where block i is defined in blockRows_
656  LO blockStart = this->blockOffsets_[i];
657  LO blockSize = this->blockSizes_[i];
658  LO blockPointSize = this->blockSizes_[i] * this->scalarsPerRow_;
659  LO blockEnd = blockStart + blockSize;
660  ArrayView<const LO> blockRows = this->getBlockRows(i);
661  //Set the lookup table entries for the columns appearing in block i.
662  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
663  //this is OK. The values updated here are only needed to process block i's entries.
664  for(LO j = 0; j < blockSize; j++)
665  {
666  LO localCol = this->translateRowToCol(blockRows[j]);
667  colToBlockOffset[localCol] = blockStart + j;
668  }
669  //First, count the number of entries in each row of block i
670  //(to allocate it with StaticProfile)
671  Array<size_t> rowEntryCounts(blockPointSize, 0);
672  //blockRow counts the BlockCrs LIDs that are going into this block
673  //Rows are inserted into the CrsMatrix in sequential order
674  using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
675  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
676  {
677  //get a raw view of the whole block row
678  inds_type indices;
679  LO inputRow = this->blockRows_[blockStart + blockRow];
680  this->inputBlockMatrix_->getGraph()->getLocalRowView(inputRow, indices);
681  LO numEntries = (LO) indices.size();
682  for(LO br = 0; br < this->bcrsBlockSize_; br++)
683  {
684  for(LO k = 0; k < numEntries; k++)
685  {
686  LO colOffset = colToBlockOffset[indices[k]];
687  if(blockStart <= colOffset && colOffset < blockEnd)
688  {
689  rowEntryCounts[blockRow * this->bcrsBlockSize_ + br] += this->bcrsBlockSize_;
690  }
691  }
692  }
693  }
694  //now that row sizes are known, can allocate the diagonal matrix
695  RCP<InverseMap> tempMap(new InverseMap(blockPointSize, 0, this->localComm_));
696  auto diagGraph = rcp(new InverseGraph(tempMap, rowEntryCounts));
697  //insert the actual entries, one row at a time
698  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
699  {
700  //get a raw view of the whole block row
701  inds_type indices;
702  LO inputRow = this->blockRows_[blockStart + blockRow];
703  this->inputBlockMatrix_->getGraph()->getLocalRowView(inputRow, indices);
704  LO numEntries = (LO) indices.size();
705  for(LO br = 0; br < this->bcrsBlockSize_; br++)
706  {
707  indicesToInsert.clear();
708  for(LO k = 0; k < numEntries; k++)
709  {
710  LO colOffset = colToBlockOffset[indices[k]];
711  if(blockStart <= colOffset && colOffset < blockEnd)
712  {
713  LO blockCol = colOffset - blockStart;
714  //bc iterates over the columns in (block) entry k
715  for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
716  {
717  indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
718  }
719  }
720  }
721  InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
722  if(indicesToInsert.size())
723  diagGraph->insertGlobalIndices(rowToInsert, indicesToInsert());
724  }
725  }
726  diagGraph->fillComplete();
727 
728  // create matrix block
729  diagBlocks_[i] = rcp(new InverseCrs(diagGraph));
730  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
731  }
732  }
733  else
734  {
735  //get the mapping from point-indexed matrix columns to offsets in blockRows_
736  //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
737  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
738  for(int i = 0; i < this->numBlocks_; i++)
739  {
740  //Get the interval where block i is defined in blockRows_
741  LO blockStart = this->blockOffsets_[i];
742  LO blockSize = this->blockSizes_[i];
743  LO blockEnd = blockStart + blockSize;
744  ArrayView<const LO> blockRows = this->getBlockRows(i);
745  //Set the lookup table entries for the columns appearing in block i.
746  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
747  //this is OK. The values updated here are only needed to process block i's entries.
748  for(LO j = 0; j < blockSize; j++)
749  {
750  //translateRowToCol will return the corresponding split column
751  LO localCol = this->translateRowToCol(blockRows[j]);
752  colToBlockOffset[localCol] = blockStart + j;
753  }
754  Teuchos::Array<size_t> rowEntryCounts(blockSize, 0);
755  for(LO j = 0; j < blockSize; j++)
756  {
757  rowEntryCounts[j] = this->getInputRowView(this->blockRows_[blockStart + j]).size();
758  }
759  RCP<InverseMap> tempMap(new InverseMap(blockSize, 0, this->localComm_));
760  auto diagGraph = rcp(new InverseGraph(tempMap, rowEntryCounts));
761  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
762  {
763  indicesToInsert.clear();
764  //get a view of the split row
765  LO inputSplitRow = this->blockRows_[blockStart + blockRow];
766  auto rowView = this->getInputRowView(inputSplitRow);
767  for(size_t k = 0; k < rowView.size(); k++)
768  {
769  LO colOffset = colToBlockOffset[rowView.ind(k)];
770  if(blockStart <= colOffset && colOffset < blockEnd)
771  {
772  LO blockCol = colOffset - blockStart;
773  indicesToInsert.push_back(blockCol);
774  }
775  }
776  if(indicesToInsert.size())
777  diagGraph->insertGlobalIndices(blockRow, indicesToInsert());
778  }
779  diagGraph->fillComplete();
780 
781  // create matrix block
782  diagBlocks_[i] = rcp(new InverseCrs(diagGraph));
783  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
784  }
785  }
786 }
787 
788 //==============================================================================
789 template<class MatrixType, class InverseType>
790 void SparseContainer<MatrixType,InverseType>::
791 extractValues ()
792 {
793  using Teuchos::Array;
794  using Teuchos::ArrayView;
795  using Teuchos::RCP;
796  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
797  //To extract diagonal blocks, need to translate local rows to local columns.
798  //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
799  //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
800  //offset - blockOffsets_[b]: gives the column within block b.
801  //
802  //This provides the block and col within a block in O(1).
804  Teuchos::TimeMonitor::getNewCounter ("Ifpack2::SparseContainer::extractValues");
805  Teuchos::TimeMonitor timeMon (*timer);
806 
807  Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
808  Teuchos::Array<InverseScalar> valuesToInsert;
809  if(this->scalarsPerRow_ > 1)
810  {
811  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
812  for(int i = 0; i < this->numBlocks_; i++)
813  {
814  //Get the interval where block i is defined in blockRows_
815  LO blockStart = this->blockOffsets_[i];
816  LO blockSize = this->blockSizes_[i];
817  LO blockEnd = blockStart + blockSize;
818  ArrayView<const LO> blockRows = this->getBlockRows(i);
819  //Set the lookup table entries for the columns appearing in block i.
820  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
821  //this is OK. The values updated here are only needed to process block i's entries.
822  for(LO j = 0; j < blockSize; j++)
823  {
824  LO localCol = this->translateRowToCol(blockRows[j]);
825  colToBlockOffset[localCol] = blockStart + j;
826  }
827  using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
828  using vals_type = typename block_crs_matrix_type::values_host_view_type;
829  //insert the actual entries, one row at a time
830  diagBlocks_[i]->resumeFill();
831  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
832  {
833  //get a raw view of the whole block row
834  inds_type indices;
835  vals_type values;
836  LO inputRow = this->blockRows_[blockStart + blockRow];
837  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
838  LO numEntries = (LO) indices.size();
839  for(LO br = 0; br < this->bcrsBlockSize_; br++)
840  {
841  indicesToInsert.clear();
842  valuesToInsert.clear();
843  for(LO k = 0; k < numEntries; k++)
844  {
845  LO colOffset = colToBlockOffset[indices[k]];
846  if(blockStart <= colOffset && colOffset < blockEnd)
847  {
848  LO blockCol = colOffset - blockStart;
849  //bc iterates over the columns in (block) entry k
850  for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
851  {
852  indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
853  valuesToInsert.push_back(values[k * this->bcrsBlockSize_ * this->bcrsBlockSize_ + bc * this->bcrsBlockSize_ + br]);
854  }
855  }
856  }
857  InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
858  if(indicesToInsert.size())
859  diagBlocks_[i]->replaceGlobalValues(rowToInsert, indicesToInsert(), valuesToInsert());
860  }
861  }
862  diagBlocks_[i]->fillComplete();
863  }
864  }
865  else
866  {
867  //get the mapping from point-indexed matrix columns to offsets in blockRows_
868  //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
869  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
870  for(int i = 0; i < this->numBlocks_; i++)
871  {
872  //Get the interval where block i is defined in blockRows_
873  LO blockStart = this->blockOffsets_[i];
874  LO blockSize = this->blockSizes_[i];
875  LO blockEnd = blockStart + blockSize;
876  ArrayView<const LO> blockRows = this->getBlockRows(i);
877  //Set the lookup table entries for the columns appearing in block i.
878  //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
879  //this is OK. The values updated here are only needed to process block i's entries.
880  for(LO j = 0; j < blockSize; j++)
881  {
882  //translateRowToCol will return the corresponding split column
883  LO localCol = this->translateRowToCol(blockRows[j]);
884  colToBlockOffset[localCol] = blockStart + j;
885  }
886  diagBlocks_[i]->resumeFill();
887  for(LO blockRow = 0; blockRow < blockSize; blockRow++)
888  {
889  valuesToInsert.clear();
890  indicesToInsert.clear();
891  //get a view of the split row
892  LO inputSplitRow = this->blockRows_[blockStart + blockRow];
893  auto rowView = this->getInputRowView(inputSplitRow);
894  for(size_t k = 0; k < rowView.size(); k++)
895  {
896  LO colOffset = colToBlockOffset[rowView.ind(k)];
897  if(blockStart <= colOffset && colOffset < blockEnd)
898  {
899  LO blockCol = colOffset - blockStart;
900  indicesToInsert.push_back(blockCol);
901  valuesToInsert.push_back(rowView.val(k));
902  }
903  }
904  if(indicesToInsert.size())
905  diagBlocks_[i]->replaceGlobalValues(blockRow, indicesToInsert, valuesToInsert);
906  }
907  diagBlocks_[i]->fillComplete ();
908  }
909  }
910 }
911 
912 template<typename MatrixType, typename InverseType>
914 {
915  typedef ILUT<Tpetra::RowMatrix<SC, LO, GO, NO> > ILUTInverse;
916 #ifdef HAVE_IFPACK2_AMESOS2
918  if(std::is_same<InverseType, ILUTInverse>::value)
919  {
920  return "SparseILUT";
921  }
922  else if(std::is_same<InverseType, AmesosInverse>::value)
923  {
924  return "SparseAmesos";
925  }
926  else
927  {
928  throw std::logic_error("InverseType for SparseContainer must be Ifpack2::ILUT or Details::Amesos2Wrapper");
929  }
930 #else
931  // Macros can't have commas in their arguments, so we have to
932  // compute the bool first argument separately.
933  constexpr bool inverseTypeIsILUT = std::is_same<InverseType, ILUTInverse>::value;
934  TEUCHOS_TEST_FOR_EXCEPTION(! inverseTypeIsILUT, std::logic_error,
935  "InverseType for SparseContainer must be Ifpack2::ILUT<ROW>");
936  return "SparseILUT"; //the only supported sparse container specialization if no Amesos2
937 #endif
938 }
939 
940 } // namespace Ifpack2
941 
942 // For ETI
943 #include "Ifpack2_ILUT.hpp"
944 #ifdef HAVE_IFPACK2_AMESOS2
945 #include "Ifpack2_Details_Amesos2Wrapper.hpp"
946 #endif
947 
948 // There's no need to instantiate for CrsMatrix too. All Ifpack2
949 // preconditioners can and should do dynamic casts if they need a type
950 // more specific than RowMatrix.
951 
952 #ifdef HAVE_IFPACK2_AMESOS2
953 # define IFPACK2_SPARSECONTAINER_INSTANT(S,LO,GO,N) \
954  template class Ifpack2::SparseContainer< Tpetra::RowMatrix<S, LO, GO, N>, \
955  Ifpack2::ILUT<Tpetra::RowMatrix<S,LO,GO,N> > >; \
956  template class Ifpack2::SparseContainer< Tpetra::RowMatrix<S, LO, GO, N>, \
957  Ifpack2::Details::Amesos2Wrapper<Tpetra::RowMatrix<S,LO,GO,N> > >;
958 #else
959 # define IFPACK2_SPARSECONTAINER_INSTANT(S,LO,GO,N) \
960  template class Ifpack2::SparseContainer< Tpetra::RowMatrix<S,LO,GO,N>, \
961  Ifpack2::ILUT<Tpetra::RowMatrix<S, LO, GO, N> > >;
962 #endif
963 #endif // IFPACK2_SPARSECONTAINER_DEF_HPP
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_SparseContainer_def.hpp:913
SparseContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_SparseContainer_def.hpp:27
Store and solve a local sparse linear problem.
Definition: Ifpack2_SparseContainer_decl.hpp:101
static RCP< Time > getNewCounter(const std::string &name)
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_SparseContainer_def.hpp:458
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:310
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_SparseContainer_def.hpp:53
ILUT (incomplete LU factorization with threshold) of a Tpetra sparse matrix.
Definition: Ifpack2_ILUT_decl.hpp:60
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_SparseContainer_def.hpp:421
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_SparseContainer_def.hpp:431
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Wrapper class for direct solvers in Amesos2.
Definition: Ifpack2_Details_Amesos2Wrapper_decl.hpp:69
typename mv_type::dual_view_type::t_host HostView
Definition: Ifpack2_Container_decl.hpp:106
virtual void apply(ConstHostView X, HostView Y, int blockIndex, Teuchos::ETransp mode=Teuchos::NO_TRANS, SC alpha=Teuchos::ScalarTraits< SC >::one(), SC beta=Teuchos::ScalarTraits< SC >::zero()) const
Compute Y := alpha * M^{-1} X + beta*Y.
Definition: Ifpack2_SparseContainer_def.hpp:147
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
virtual void weightedApply(ConstHostView X, HostView Y, ConstHostView W, int blockIndex, Teuchos::ETransp mode=Teuchos::NO_TRANS, SC alpha=Teuchos::ScalarTraits< SC >::one(), SC beta=Teuchos::ScalarTraits< SC >::zero()) const
Compute Y := alpha * diag(D) * M^{-1} (diag(D) * X) + beta*Y.
Definition: Ifpack2_SparseContainer_def.hpp:271
void push_back(const value_type &x)
virtual ~SparseContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_SparseContainer_def.hpp:42
size_type size() const
TypeTo as(const TypeFrom &t)
virtual void setParameters(const Teuchos::ParameterList &List)
Set all necessary parameters.
Definition: Ifpack2_SparseContainer_def.hpp:46
virtual void compute()
Initialize and compute all blocks.
Definition: Ifpack2_SparseContainer_def.hpp:85
Ifpack2::SparseContainer class declaration.
void clearBlocks()
Definition: Ifpack2_SparseContainer_def.hpp:109
Implementation detail of Ifpack2::Container subclasses.
Definition: Ifpack2_Details_MultiVectorLocalGatherScatter.hpp:52