Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Container.hpp
Go to the documentation of this file.
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_CONTAINER_HPP
44 #define IFPACK2_CONTAINER_HPP
45 
48 
49 #include "Ifpack2_ConfigDefs.hpp"
50 #include "Tpetra_RowMatrix.hpp"
51 #include "Teuchos_Describable.hpp"
52 #include <Ifpack2_Partitioner.hpp>
53 #include <Tpetra_Map.hpp>
54 #include <Tpetra_Experimental_BlockCrsMatrix_decl.hpp>
56 #include <Teuchos_Time.hpp>
57 #include <iostream>
58 #include <type_traits>
59 
60 #ifndef DOXYGEN_SHOULD_SKIP_THIS
61 namespace Teuchos {
62  // Forward declaration to avoid include.
63  class ParameterList;
64 } // namespace Teuchos
65 #endif // DOXYGEN_SHOULD_SKIP_THIS
66 
67 namespace Ifpack2 {
68 
113 template<class MatrixType>
116 
117 protected:
118  typedef typename MatrixType::scalar_type scalar_type;
119  typedef typename MatrixType::local_ordinal_type local_ordinal_type;
120  typedef typename MatrixType::global_ordinal_type global_ordinal_type;
121  typedef typename MatrixType::node_type node_type;
122  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> mv_type;
123  typedef Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> vector_type;
124  typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
126  typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> import_type;
128  typedef Tpetra::Experimental::BlockCrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> block_crs_matrix_type;
129  typedef Tpetra::RowMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> row_matrix_type;
130 
131  static_assert(std::is_same<MatrixType, row_matrix_type>::value,
132  "Ifpack2::Container: Please use MatrixType = Tpetra::RowMatrix.");
133 
135  typedef typename Kokkos::Details::ArithTraits<scalar_type>::val_type impl_scalar_type;
137 
138 public:
139  typedef typename mv_type::dual_view_type::t_host HostView;
140 
151  const Teuchos::RCP<const import_type>& importer,
152  int OverlapLevel,
153  scalar_type DampingFactor) :
154  inputMatrix_ (matrix),
155  OverlapLevel_ (OverlapLevel),
156  DampingFactor_ (DampingFactor),
157  Importer_ (importer)
158  {
159  using Teuchos::Ptr;
160  using Teuchos::RCP;
161  using Teuchos::Array;
162  using Teuchos::ArrayView;
163  using Teuchos::Comm;
164  // typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> import_type; // unused
165  NumLocalRows_ = inputMatrix_->getNodeNumRows();
166  NumGlobalRows_ = inputMatrix_->getGlobalNumRows();
167  NumGlobalNonzeros_ = inputMatrix_->getGlobalNumEntries();
168  IsParallel_ = inputMatrix_->getRowMap()->getComm()->getSize() != 1;
169  auto global_bcrs =
170  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(inputMatrix_);
171  hasBlockCrs_ = !global_bcrs.is_null();
172  if(hasBlockCrs_)
173  bcrsBlockSize_ = global_bcrs->getBlockSize();
174  else
175  bcrsBlockSize_ = 1;
176  setBlockSizes(partitions);
177  }
178 
191  const Teuchos::Array<local_ordinal_type>& localRows) :
192  inputMatrix_ (matrix),
193  numBlocks_ (1),
194  partitions_ (localRows.size()),
195  blockRows_ (1),
196  partitionIndices_ (1),
197  OverlapLevel_ (0),
198  DampingFactor_ (STS::one()),
199  Importer_ (Teuchos::null)
200  {
201  NumLocalRows_ = inputMatrix_->getNodeNumRows();
202  NumGlobalRows_ = inputMatrix_->getGlobalNumRows();
203  NumGlobalNonzeros_ = inputMatrix_->getGlobalNumEntries();
204  IsParallel_ = inputMatrix_->getRowMap()->getComm()->getSize() > 1;
205  blockRows_[0] = localRows.size();
206  partitionIndices_[0] = 0;
207  const block_crs_matrix_type* global_bcrs =
208  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(inputMatrix_).get();
209  hasBlockCrs_ = global_bcrs;
210  if(hasBlockCrs_)
211  bcrsBlockSize_ = global_bcrs->getBlockSize();
212  else
213  bcrsBlockSize_ = 1;
214  for(local_ordinal_type i = 0; i < localRows.size(); i++)
215  partitions_[i] = localRows[i];
216  }
217 
219  virtual ~Container() {};
220 
237  {
239  (&partitions_[partitionIndices_[blockIndex]], blockRows_[blockIndex]);
240  }
241 
250  virtual void initialize () = 0;
251 
254  {
255  //First, create a grand total of all the rows in all the blocks
256  //Note: If partitioner allowed overlap, this could be greater than the # of local rows
257  local_ordinal_type totalBlockRows = 0;
258  numBlocks_ = partitions.size();
261  for(int i = 0; i < numBlocks_; i++)
262  {
263  local_ordinal_type rowsInBlock = partitions[i].size();
264  blockRows_[i] = rowsInBlock;
265  partitionIndices_[i] = totalBlockRows;
266  totalBlockRows += rowsInBlock;
267  }
268  partitions_.resize(totalBlockRows);
269  //set partitions_: each entry is the partition/block of the row
270  local_ordinal_type iter = 0;
271  for(int i = 0; i < numBlocks_; i++)
272  {
273  for(int j = 0; j < blockRows_[i]; j++)
274  {
275  partitions_[iter++] = partitions[i][j];
276  }
277  }
278  }
279 
280  void getMatDiag() const
281  {
282  if(Diag_.is_null())
283  {
284  Diag_ = rcp(new vector_type(inputMatrix_->getDomainMap()));
285  inputMatrix_->getLocalDiagCopy(*Diag_);
286  }
287  }
288 
294  virtual void compute () = 0;
295 
296  void DoJacobi(HostView& X, HostView& Y, int stride) const;
297  void DoOverlappingJacobi(HostView& X, HostView& Y, HostView& W, int stride) const;
298  void DoGaussSeidel(HostView& X, HostView& Y, HostView& Y2, int stride) const;
299  void DoSGS(HostView& X, HostView& Y, HostView& Y2, int stride) const;
300 
302  virtual void setParameters (const Teuchos::ParameterList& List) = 0;
303 
305  virtual bool isInitialized () const = 0;
306 
308  virtual bool isComputed () const = 0;
309 
322  virtual void
323  apply (HostView& X,
324  HostView& Y,
325  int blockIndex,
326  int stride,
328  scalar_type alpha = Teuchos::ScalarTraits<scalar_type>::one(),
329  scalar_type beta = Teuchos::ScalarTraits<scalar_type>::zero()) const = 0;
330 
333  // The underlying container implements the splitting <tt>A = D + R</tt>. Only
334  // it can have efficient access to D and R, as these are constructed in the
335  // symbolic and numeric phases.
336  //
337  // This is the first performance-portable implementation of a block
338  // relaxation, and it is supported currently only by BlockTriDiContainer.
339  virtual void applyInverseJacobi (const mv_type& /* X */, mv_type& /* Y */,
340  bool /* zeroStartingSolution = false */,
341  int /* numSweeps = 1 */) const
342  { TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented."); }
343 
345  void applyMV (mv_type& X, mv_type& Y) const
346  {
347 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
348  HostView XView = X.template getLocalView<Kokkos::HostSpace>();
349  HostView YView = Y.template getLocalView<Kokkos::HostSpace>();
350 #else
351  HostView XView = X.getLocalViewHost();
352  HostView YView = Y.getLocalViewHost();
353 #endif
354  this->apply (XView, YView, 0, X.getStride());
355  }
356 
377  virtual void
378  weightedApply (HostView& X,
379  HostView& Y,
380  HostView& W,
381  int blockIndex,
382  int stride,
384  scalar_type alpha = Teuchos::ScalarTraits<scalar_type>::one(),
385  scalar_type beta = Teuchos::ScalarTraits<scalar_type>::zero()) const = 0;
386 
388  void weightedApplyMV (mv_type& X,
389  mv_type& Y,
390  vector_type& W)
391  {
392 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
393  HostView XView = X.template getLocalView<Kokkos::HostSpace>();
394  HostView YView = Y.template getLocalView<Kokkos::HostSpace>();
395  HostView WView = W.template getLocalView<Kokkos::HostSpace>();
396 #else
397  HostView XView = X.getLocalViewHost();
398  HostView YView = Y.getLocalViewHost();
399  HostView WView = W.getLocalViewHost();
400 #endif
401  weightedApply (XView, YView, WView, 0, X.getStride());
402  }
403 
404  virtual void clearBlocks();
405 
407  virtual std::ostream& print (std::ostream& os) const = 0;
408 
411  static std::string getName()
412  {
413  return "Generic";
414  }
415 
416 protected:
419 
423  Teuchos::Array<local_ordinal_type> partitions_; //size: total # of local rows (in all local blocks)
435  scalar_type DampingFactor_;
439  local_ordinal_type NumLocalRows_;
441  global_ordinal_type NumGlobalRows_;
443  global_ordinal_type NumGlobalNonzeros_;
448 };
449 
451 template <class MatrixType>
452 inline std::ostream&
453 operator<< (std::ostream& os, const Ifpack2::Container<MatrixType>& obj)
454 {
455  return obj.print (os);
456 }
457 
458 template <class MatrixType>
459 void Container<MatrixType>::DoJacobi(HostView& X, HostView& Y, int stride) const
460 {
461  const scalar_type one = STS::one();
462  // Note: Flop counts copied naively from Ifpack.
463  // use partitions_ and blockRows_
464  size_t numVecs = X.extent(1);
465  // Non-overlapping Jacobi
466  for (local_ordinal_type i = 0; i < numBlocks_; i++)
467  {
468  // may happen that a partition is empty
469  if(blockRows_[i] != 1 || hasBlockCrs_)
470  {
471  if(blockRows_[i] == 0 )
472  continue;
473  apply(X, Y, i, stride, Teuchos::NO_TRANS, DampingFactor_, one);
474  }
475  else // singleton, can't access Containers_[i] as it was never filled and may be null.
476  {
477  local_ordinal_type LRID = partitions_[partitionIndices_[i]];
478  getMatDiag();
479 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
480  HostView diagView = Diag_->template getLocalView<Kokkos::HostSpace>();
481 #else
482  HostView diagView = Diag_->getLocalViewHost();
483 #endif
484  impl_scalar_type d = (impl_scalar_type) one / diagView(LRID, 0);
485  for(size_t nv = 0; nv < numVecs; nv++)
486  {
487  impl_scalar_type x = X(LRID, nv);
488  Y(LRID, nv) = x * d;
489  }
490  }
491  }
492 }
493 
494 template <class MatrixType>
495 void Container<MatrixType>::DoOverlappingJacobi(HostView& X, HostView& Y, HostView& W, int stride) const
496 {
497  // Overlapping Jacobi
498  for(local_ordinal_type i = 0; i < numBlocks_; i++)
499  {
500  // may happen that a partition is empty
501  if(blockRows_[i] == 0)
502  continue;
503  if(blockRows_[i] != 1)
504  weightedApply(X, Y, W, i, stride, Teuchos::NO_TRANS, DampingFactor_, STS::one());
505  }
506 }
507 
508 template<class MatrixType>
509 void Container<MatrixType>::DoGaussSeidel(HostView& X, HostView& Y, HostView& Y2, int stride) const
510 {
511  using Teuchos::Array;
512  using Teuchos::ArrayRCP;
513  using Teuchos::ArrayView;
514  using Teuchos::Ptr;
515  using Teuchos::RCP;
516  using Teuchos::rcp;
517  using Teuchos::rcpFromRef;
518  // Note: Flop counts copied naively from Ifpack.
519  const scalar_type one = STS::one();
520  const size_t Length = inputMatrix_->getNodeMaxNumRowEntries();
521  auto numVecs = X.extent(1);
522  Array<scalar_type> Values;
523  Array<local_ordinal_type> Indices;
524  Indices.resize(Length);
525 
526  Values.resize(bcrsBlockSize_ * bcrsBlockSize_ * Length); //note: if A was not a BlockCRS, bcrsBlockSize_ is 1
527  // I think I decided I need two extra vectors:
528  // One to store the sum of the corrections (initialized to zero)
529  // One to store the temporary residual (doesn't matter if it is zeroed or not)
530  // My apologies for making the names clear and meaningful. (X=RHS, Y=guess?! Nice.)
531  HostView Resid("", X.extent(0), X.extent(1));
532  for(local_ordinal_type i = 0; i < numBlocks_; i++)
533  {
534  if(blockRows_[i] > 1 || hasBlockCrs_)
535  {
536  if (blockRows_[i] == 0)
537  continue;
538  // update from previous block
539  ArrayView<const local_ordinal_type> localRows = getLocalRows (i);
540  const size_t localNumRows = blockRows_[i];
541  for(size_t j = 0; j < localNumRows; j++)
542  {
543  const local_ordinal_type LID = localRows[j]; // Containers_[i]->ID (j);
544  size_t NumEntries;
545  inputMatrix_->getLocalRowCopy(LID, Indices(), Values(), NumEntries);
546  for(size_t m = 0; m < numVecs; m++)
547  {
548  if(hasBlockCrs_)
549  {
550  for (int localR = 0; localR < bcrsBlockSize_; localR++)
551  Resid(LID * bcrsBlockSize_ + localR, m) = X(LID * bcrsBlockSize_ + localR, m);
552  for (size_t k = 0; k < NumEntries; ++k)
553  {
554  const local_ordinal_type col = Indices[k];
555  for (int localR = 0; localR < bcrsBlockSize_; localR++)
556  {
557  for(int localC = 0; localC < bcrsBlockSize_; localC++)
558  {
559  Resid(LID * bcrsBlockSize_ + localR, m) -=
560  Values[k * bcrsBlockSize_ * bcrsBlockSize_ + localR + localC * bcrsBlockSize_]
561  * Y2(col * bcrsBlockSize_ + localC, m);
562  }
563  }
564  }
565  }
566  else
567  {
568  Resid(LID, m) = X(LID, m);
569  for (size_t k = 0; k < NumEntries; ++k)
570  {
571  const local_ordinal_type col = Indices[k];
572  Resid(LID, m) -= Values[k] * Y2(col, m);
573  }
574  }
575  }
576  }
577  // solve with this block
578  //
579  // Note: I'm abusing the ordering information, knowing that X/Y
580  // and Y2 have the same ordering for on-proc unknowns.
581  //
582  // Note: Add flop counts for inverse apply
583  apply(Resid, Y2, i, stride, Teuchos::NO_TRANS, DampingFactor_, one);
584  }
585  else if(blockRows_[i] == 1)
586  {
587  // singleton, can't access Containers_[i] as it was never filled and may be null.
588  // a singleton calculation is exact, all residuals should be zero.
589  local_ordinal_type LRID = partitionIndices_[i]; // by definition, a singleton 1 row in block.
590  getMatDiag();
591 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
592  HostView diagView = Diag_->template getLocalView<Kokkos::HostSpace>();
593 #else
594  HostView diagView = Diag_->getLocalViewHost();
595 #endif
596  impl_scalar_type d = (impl_scalar_type) one / diagView(LRID, 0);
597  for(size_t m = 0; m < numVecs; m++)
598  {
599  impl_scalar_type x = X(LRID, m);
600  impl_scalar_type newy = x * d;
601  Y2(LRID, m) = newy;
602  }
603  } // end else
604  } // end for numBlocks_
605  if(IsParallel_)
606  {
607  auto numMyRows = inputMatrix_->getNodeNumRows();
608  for (size_t m = 0; m < numVecs; ++m)
609  {
610  for (size_t i = 0; i < numMyRows * bcrsBlockSize_; ++i)
611  {
612  Y(i, m) = Y2(i, m);
613  }
614  }
615  }
616 }
617 
618 template<class MatrixType>
619 void Container<MatrixType>::DoSGS(HostView& X, HostView& Y, HostView& Y2, int stride) const
620 {
621  using Teuchos::Array;
622  using Teuchos::ArrayRCP;
623  using Teuchos::ArrayView;
624  using Teuchos::Ptr;
625  using Teuchos::ptr;
626  using Teuchos::RCP;
627  using Teuchos::rcp;
628  using Teuchos::rcpFromRef;
629  const scalar_type one = STS::one();
630  auto numVecs = X.extent(1);
631  const size_t Length = inputMatrix_->getNodeMaxNumRowEntries();
632  Array<scalar_type> Values;
633  Array<local_ordinal_type> Indices(Length);
634  Values.resize(bcrsBlockSize_ * bcrsBlockSize_ * Length);
635  // I think I decided I need two extra vectors:
636  // One to store the sum of the corrections (initialized to zero)
637  // One to store the temporary residual (doesn't matter if it is zeroed or not)
638  // My apologies for making the names clear and meaningful. (X=RHS, Y=guess?! Nice.)
639  HostView Resid("", X.extent(0), X.extent(1));
640  // Forward Sweep
641  for(local_ordinal_type i = 0; i < numBlocks_; i++)
642  {
643  if(blockRows_[i] != 1 || hasBlockCrs_)
644  {
645  if(blockRows_[i] == 0)
646  continue; // Skip empty partitions
647  // update from previous block
648  ArrayView<const local_ordinal_type> localRows = getLocalRows(i);
649  for(local_ordinal_type j = 0; j < blockRows_[i]; j++)
650  {
651  const local_ordinal_type LID = localRows[j]; // Containers_[i]->ID (j);
652  size_t NumEntries;
653  inputMatrix_->getLocalRowCopy(LID, Indices(), Values(), NumEntries);
654  //set tmpresid = initresid - A*correction
655  for(size_t m = 0; m < numVecs; m++)
656  {
657  if(hasBlockCrs_)
658  {
659  for(int localR = 0; localR < bcrsBlockSize_; localR++)
660  Resid(LID * bcrsBlockSize_ + localR, m) = X(LID * bcrsBlockSize_ + localR, m);
661  for(size_t k = 0; k < NumEntries; ++k)
662  {
663  const local_ordinal_type col = Indices[k];
664  for (int localR = 0; localR < bcrsBlockSize_; localR++)
665  {
666  for(int localC = 0; localC < bcrsBlockSize_; localC++)
667  Resid(LID * bcrsBlockSize_ + localR, m) -=
668  Values[k * (bcrsBlockSize_ * bcrsBlockSize_) + (localR + localC * bcrsBlockSize_)]
669  * Y2(col * bcrsBlockSize_ + localC, m);
670  }
671  }
672  }
673  else
674  {
675  Resid(LID, m) = X(LID, m);
676  for(size_t k = 0; k < NumEntries; k++)
677  {
678  local_ordinal_type col = Indices[k];
679  Resid(LID, m) -= Values[k] * Y2(col, m);
680  }
681  }
682  }
683  }
684  // solve with this block
685  //
686  // Note: I'm abusing the ordering information, knowing that X/Y
687  // and Y2 have the same ordering for on-proc unknowns.
688  //
689  // Note: Add flop counts for inverse apply
690  apply(Resid, Y2, i, stride, Teuchos::NO_TRANS, DampingFactor_, one);
691  // operations for all getrow's
692  }
693  else // singleton, can't access Containers_[i] as it was never filled and may be null.
694  {
695  local_ordinal_type LRID = partitions_[partitionIndices_[i]];
696  getMatDiag();
697 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
698  HostView diagView = Diag_->template getLocalView<Kokkos::HostSpace>();
699 #else
700  HostView diagView = Diag_->getLocalViewHost();
701 #endif
702  impl_scalar_type d = (impl_scalar_type) one / diagView(LRID, 0);
703  for(size_t m = 0; m < numVecs; m++)
704  {
705  impl_scalar_type x = X(LRID, m);
706  impl_scalar_type newy = x * d;
707  Y2(LRID, m) = newy;
708  }
709  } // end else
710  } // end forward sweep over NumLocalBlocks
711  // Reverse Sweep
712  //
713  // mfh 12 July 2013: The unusual iteration bounds, and the use of
714  // i-1 rather than i in the loop body, ensure correctness even if
715  // local_ordinal_type is unsigned. "i = numBlocks_-1; i >= 0;
716  // i--" will loop forever if local_ordinal_type is unsigned, because
717  // unsigned integers are (trivially) always nonnegative.
718  for(local_ordinal_type i = numBlocks_; i > 0; --i)
719  {
720  if(hasBlockCrs_ || blockRows_[i-1] != 1)
721  {
722  if(blockRows_[i - 1] == 0)
723  continue;
724  // update from previous block
725  ArrayView<const local_ordinal_type> localRows = getLocalRows(i-1);
726  for(local_ordinal_type j = 0; j < blockRows_[i-1]; j++)
727  {
728  const local_ordinal_type LID = localRows[j]; // Containers_[i-1]->ID (j);
729  size_t NumEntries;
730  inputMatrix_->getLocalRowCopy(LID, Indices(), Values(), NumEntries);
731  //set tmpresid = initresid - A*correction
732  for (size_t m = 0; m < numVecs; m++)
733  {
734  if(hasBlockCrs_)
735  {
736  for(int localR = 0; localR < bcrsBlockSize_; localR++)
737  Resid(LID * bcrsBlockSize_ + localR, m) = X(LID * bcrsBlockSize_ + localR, m);
738  for(size_t k = 0; k < NumEntries; ++k)
739  {
740  const local_ordinal_type col = Indices[k];
741  for(int localR = 0; localR < bcrsBlockSize_; localR++)
742  {
743  for(int localC = 0; localC < bcrsBlockSize_; localC++)
744  Resid(LID*bcrsBlockSize_+localR, m) -=
745  Values[k * (bcrsBlockSize_ * bcrsBlockSize_) + (localR + localC * bcrsBlockSize_)]
746  * Y2(col * bcrsBlockSize_ + localC, m);
747  }
748  }
749  }
750  else
751  {
752  Resid(LID, m) = X(LID, m);
753  for(size_t k = 0; k < NumEntries; ++k)
754  {
755  local_ordinal_type col = Indices[k];
756  Resid(LID, m) -= Values[k] * Y2(col, m);
757  }
758  }
759  }
760  }
761  // solve with this block
762  //
763  // Note: I'm abusing the ordering information, knowing that X/Y
764  // and Y2 have the same ordering for on-proc unknowns.
765  //
766  // Note: Add flop counts for inverse apply
767  apply(Resid, Y2, i - 1, stride, Teuchos::NO_TRANS, DampingFactor_, one);
768  // operations for all getrow's
769  } // end Partitioner_->numRowsInPart (i) != 1 )
770  // else do nothing, as by definition with a singleton, the residuals are zero.
771  } //end reverse sweep
772  if(IsParallel_)
773  {
774  auto numMyRows = inputMatrix_->getNodeNumRows();
775  for (size_t m = 0; m < numVecs; ++m)
776  {
777  for (size_t i = 0; i < numMyRows * bcrsBlockSize_; ++i)
778  {
779  Y(i, m) = Y2(i, m);
780  }
781  }
782  }
783 }
784 
785 template<class MatrixType>
786 void Container<MatrixType>::clearBlocks()
787 {
788  numBlocks_ = 0;
789  partitions_.clear();
790  blockRows_.clear();
791  partitionIndices_.clear();
792  Diag_ = Teuchos::null; //Diag_ will be recreated if needed
793 }
794 
795 } // namespace Ifpack2
796 
797 namespace Teuchos {
798 
803 template<class MatrixType>
804 class TEUCHOSCORE_LIB_DLL_EXPORT TypeNameTraits< ::Ifpack2::Container<MatrixType> >
805 {
806  public:
807  static std::string name () {
808  return std::string ("Ifpack2::Container<") +
810  }
811 
812  static std::string concreteName (const ::Ifpack2::Container<MatrixType>&) {
813  return name ();
814  }
815 };
816 
817 } // namespace Teuchos
818 
819 #endif // IFPACK2_CONTAINER_HPP
Teuchos::ArrayView< const local_ordinal_type > getLocalRows(int blockIndex) const
Local indices of the rows of the input matrix that belong to this block.
Definition: Ifpack2_Container.hpp:236
int OverlapLevel_
Number of rows of overlap for adjacent blocks.
Definition: Ifpack2_Container.hpp:433
global_ordinal_type NumGlobalNonzeros_
Number of nonzeros in input matrix.
Definition: Ifpack2_Container.hpp:443
virtual void initialize()=0
Do all set-up operations that only require matrix structure.
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container.hpp:421
Teuchos::Array< local_ordinal_type > blockRows_
Number of rows in each block.
Definition: Ifpack2_Container.hpp:425
static std::string getName()
Definition: Ifpack2_Container.hpp:411
global_ordinal_type NumGlobalRows_
Number of global rows in input matrix.
Definition: Ifpack2_Container.hpp:441
bool hasBlockCrs_
Whether the input matrix is a BlockCRS matrix.
Definition: Ifpack2_Container.hpp:445
scalar_type DampingFactor_
Damping factor, passed to apply() as alpha.
Definition: Ifpack2_Container.hpp:435
Ifpack2::Partitioner:
Definition: Ifpack2_Partitioner.hpp:179
int bcrsBlockSize_
If hasBlockCrs_, the number of DOFs per vertex. Otherwise 1.
Definition: Ifpack2_Container.hpp:447
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
virtual bool isComputed() const =0
Return true if the container has been successfully computed.
local_ordinal_type NumLocalRows_
Number of local rows in input matrix.
Definition: Ifpack2_Container.hpp:439
virtual void weightedApply(HostView &X, HostView &Y, HostView &W, int blockIndex, int stride, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const =0
Compute Y := alpha * diag(D) * M^{-1} (diag(D) * X) + beta*Y.
Teuchos::RCP< const row_matrix_type > inputMatrix_
The input matrix to the constructor.
Definition: Ifpack2_Container.hpp:418
Teuchos::RCP< vector_type > Diag_
Diagonal elements.
Definition: Ifpack2_Container.hpp:429
void resize(size_type new_size, const value_type &x=value_type())
void applyMV(mv_type &X, mv_type &Y) const
Wrapper for apply with MVs, used in unit tests (never called by BlockRelaxation)
Definition: Ifpack2_Container.hpp:345
Container(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< local_ordinal_type > > &partitions, const Teuchos::RCP< const import_type > &importer, int OverlapLevel, scalar_type DampingFactor)
Constructor.
Definition: Ifpack2_Container.hpp:149
Teuchos::RCP< const Tpetra::Import< local_ordinal_type, global_ordinal_type, node_type > > Importer_
Importer for importing off-process elements of MultiVectors.
Definition: Ifpack2_Container.hpp:437
bool IsParallel_
Whether the problem is distributed across multiple MPI processes.
Definition: Ifpack2_Container.hpp:431
void weightedApplyMV(mv_type &X, mv_type &Y, vector_type &W)
Wrapper for weightedApply with MVs, used in unit tests (never called by BlockRelaxation) ...
Definition: Ifpack2_Container.hpp:388
virtual void applyInverseJacobi(const mv_type &, mv_type &, bool, int) const
Compute Y := (1 - a) Y + a D^{-1} (X - R*Y).
Definition: Ifpack2_Container.hpp:339
Teuchos::Array< local_ordinal_type > partitionIndices_
Starting index in partitions_ of local row indices for each block.
Definition: Ifpack2_Container.hpp:427
size_type size() const
virtual void compute()=0
Extract the local diagonal block and prepare the solver.
Interface for creating and solving a local linear problem.
Definition: Ifpack2_Container.hpp:114
virtual void apply(HostView &X, HostView &Y, int blockIndex, int stride, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const =0
Compute Y := alpha * M^{-1} X + beta*Y.
virtual void setParameters(const Teuchos::ParameterList &List)=0
Set parameters.
virtual std::ostream & print(std::ostream &os) const =0
Print basic information about the container to os.
virtual ~Container()
Destructor.
Definition: Ifpack2_Container.hpp:219
Teuchos::Array< local_ordinal_type > partitions_
Local indices of the rows of the input matrix that belong to this block.
Definition: Ifpack2_Container.hpp:423
virtual bool isInitialized() const =0
Return true if the container has been successfully initialized.
void setBlockSizes(const Teuchos::Array< Teuchos::Array< local_ordinal_type > > &partitions)
Initialize arrays with information about block sizes.
Definition: Ifpack2_Container.hpp:253
Kokkos::Details::ArithTraits< scalar_type >::val_type impl_scalar_type
Internal representation of Scalar in Kokkos::View.
Definition: Ifpack2_Container.hpp:132
Container(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< local_ordinal_type > &localRows)
Constructor for single block.
Definition: Ifpack2_Container.hpp:190
static std::string name()
bool is_null() const