Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_GaussSeidel.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_GAUSS_SEIDEL_HPP
11 #define IFPACK2_GAUSS_SEIDEL_HPP
12 
13 namespace Ifpack2 {
14 namespace Details {
15 template <typename Scalar, typename LO, typename GO, typename NT>
16 struct GaussSeidel {
17  using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
18  using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
19  using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
20  using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
21  using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
22  using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
23  using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
24  using mem_space_t = typename local_matrix_device_type::memory_space;
25  using rowmap_t = typename local_matrix_device_type::row_map_type::host_mirror_type;
26  using entries_t = typename local_matrix_device_type::index_type::host_mirror_type;
27  using values_t = typename local_matrix_device_type::values_type::host_mirror_type;
28  using const_rowmap_t = typename rowmap_t::const_type;
29  using const_entries_t = typename entries_t::const_type;
30  using const_values_t = typename values_t::const_type;
31  using Offset = typename rowmap_t::non_const_value_type;
32  using IST = typename crs_matrix_type::impl_scalar_type;
33 #if KOKKOS_VERSION >= 40799
34  using KAT = KokkosKernels::ArithTraits<IST>;
35 #else
36  using KAT = Kokkos::ArithTraits<IST>;
37 #endif
38  // Type of view representing inverse diagonal blocks, and its host_mirror_type.
39  using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
40  using InverseBlocksHost = typename InverseBlocks::host_mirror_type;
41 
42  typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
43  typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
44  typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
45 
46  // Setup for CrsMatrix
47  GaussSeidel(Teuchos::RCP<const crs_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
48  A = A_;
49  numRows = A_->getLocalNumRows();
50  haveRowMatrix = false;
51  inverseDiagVec = inverseDiagVec_;
52  applyRows = applyRows_;
53  blockSize = 1;
54  omega = omega_;
55  }
56 
57  GaussSeidel(Teuchos::RCP<const row_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
58  A = A_;
59  numRows = A_->getLocalNumRows();
60  haveRowMatrix = true;
61  inverseDiagVec = inverseDiagVec_;
62  applyRows = applyRows_;
63  blockSize = 1;
64  omega = omega_;
65  // Here, need to make a deep CRS copy to avoid slow access via getLocalRowCopy
66  rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing("Arowmap"), numRows + 1);
67  rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing("Aentries"), A_->getLocalNumEntries());
68  rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing("Avalues"), A_->getLocalNumEntries());
69  size_t maxDegree = A_->getLocalMaxNumRowEntries();
70  nonconst_values_host_view_type rowValues("rowValues", maxDegree);
71  nonconst_local_inds_host_view_type rowEntries("rowEntries", maxDegree);
72  size_t accum = 0;
73  for (LO i = 0; i <= numRows; i++) {
74  rowMatrixRowmap(i) = accum;
75  if (i == numRows)
76  break;
77  size_t degree;
78  A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
79  accum += degree;
80  size_t rowBegin = rowMatrixRowmap(i);
81  for (size_t j = 0; j < degree; j++) {
82  rowMatrixEntries(rowBegin + j) = rowEntries[j];
83  rowMatrixValues(rowBegin + j) = rowValues[j];
84  }
85  }
86  }
87 
88  GaussSeidel(Teuchos::RCP<const bcrs_matrix_type> A_, const InverseBlocks& inverseBlockDiag_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
89  A = A_;
90  numRows = A_->getLocalNumRows();
91  haveRowMatrix = false;
92  // note: next 2 lines are no-ops if inverseBlockDiag_ is already host-accessible
93  inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
94  Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
95  applyRows = applyRows_;
96  omega = omega_;
97  blockSize = A_->getBlockSize();
98  }
99 
100  template <bool useApplyRows, bool multipleRHS, bool omegaNotOne>
101  void applyImpl(const const_values_t& Avalues, const const_rowmap_t& Arowmap, const const_entries_t& Aentries, multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction) {
102  // note: direction is either Forward or Backward (Symmetric is handled in apply())
103  LO numApplyRows = useApplyRows ? (LO)applyRows.size() : numRows;
104  // note: inverseDiagMV always has only one column
105  auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
106  bool forward = direction == Tpetra::Forward;
107  if (multipleRHS) {
108  LO numVecs = x.getNumVectors();
109  Teuchos::Array<IST> accum(numVecs);
110  auto xlcl = x.get2dViewNonConst();
111  auto blcl = b.get2dView();
112  for (LO i = 0; i < numApplyRows; i++) {
113  LO row;
114  if (forward)
115  row = useApplyRows ? applyRows[i] : i;
116  else
117  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
118  for (LO k = 0; k < numVecs; k++) {
119  accum[k] = KAT::zero();
120  }
121  Offset rowBegin = Arowmap(row);
122  Offset rowEnd = Arowmap(row + 1);
123  for (Offset j = rowBegin; j < rowEnd; j++) {
124  LO col = Aentries(j);
125  IST val = Avalues(j);
126  for (LO k = 0; k < numVecs; k++) {
127  accum[k] += val * IST(xlcl[k][col]);
128  }
129  }
130  // Update x
131  IST dinv = inverseDiag(row);
132  for (LO k = 0; k < numVecs; k++) {
133  if (omegaNotOne)
134  xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
135  else
136  xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
137  }
138  }
139  } else {
140  auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
141  auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
142  auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
143  for (LO i = 0; i < numApplyRows; i++) {
144  LO row;
145  if (forward)
146  row = useApplyRows ? applyRows[i] : i;
147  else
148  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
149  IST accum = KAT::zero();
150  Offset rowBegin = Arowmap(row);
151  Offset rowEnd = Arowmap(row + 1);
152  for (Offset j = rowBegin; j < rowEnd; j++) {
153  accum += Avalues(j) * xlcl(Aentries(j));
154  }
155  // Update x
156  IST dinv = dlcl(row);
157  if (omegaNotOne)
158  xlcl(row) += omega * dinv * (blcl(row) - accum);
159  else
160  xlcl(row) += dinv * (blcl(row) - accum);
161  }
162  }
163  }
164 
165  void applyBlock(block_multivector_type& x, const block_multivector_type& b, Tpetra::ESweepDirection direction) {
166  if (direction == Tpetra::Symmetric) {
167  applyBlock(x, b, Tpetra::Forward);
168  applyBlock(x, b, Tpetra::Backward);
169  return;
170  }
171  auto Abcrs = Teuchos::rcp_dynamic_cast<const bcrs_matrix_type>(A);
172  if (Abcrs.is_null())
173  throw std::runtime_error("Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
174  auto Avalues = Abcrs->getValuesHost();
175  auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
176  auto Arowmap = AlclGraph.row_map;
177  auto Aentries = AlclGraph.entries;
178  // Number of scalars in Avalues per block entry.
179  Offset bs2 = blockSize * blockSize;
180  LO numVecs = x.getNumVectors();
181  Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
182  Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel Accumulator"), blockSize, numVecs);
183  Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
184  Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
185  bool useApplyRows = !applyRows.is_null();
186  bool forward = direction == Tpetra::Forward;
187  LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
188  for (LO i = 0; i < numApplyRows; i++) {
189  LO row;
190  if (forward)
191  row = useApplyRows ? applyRows[i] : i;
192  else
193  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
194  for (LO v = 0; v < numVecs; v++) {
195  auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
196  for (LO k = 0; k < blockSize; k++) {
197  accum(k, v) = KAT::zero();
198  }
199  }
200  Offset rowBegin = Arowmap(row);
201  Offset rowEnd = Arowmap(row + 1);
202  for (Offset j = rowBegin; j < rowEnd; j++) {
203  LO col = Aentries(j);
204  const IST* blk = &Avalues(j * bs2);
205  for (LO v = 0; v < numVecs; v++) {
206  auto xCol = x.getLocalBlockHost(col, v, Tpetra::Access::ReadOnly);
207  for (LO br = 0; br < blockSize; br++) {
208  for (LO bc = 0; bc < blockSize; bc++) {
209  IST Aval = blk[br * blockSize + bc];
210  accum(br, v) += Aval * xCol(bc);
211  }
212  }
213  }
214  }
215  // Update x: term is omega * Aii^-1 * accum, where omega is scalar, Aii^-1 is bs*bs, and accum is bs*nv
216  auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
217  Kokkos::deep_copy(dinv_accum, KAT::zero());
218  for (LO v = 0; v < numVecs; v++) {
219  auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
220  for (LO br = 0; br < blockSize; br++) {
221  accum(br, v) = bRow(br) - accum(br, v);
222  }
223  }
224  for (LO v = 0; v < numVecs; v++) {
225  for (LO br = 0; br < blockSize; br++) {
226  for (LO bc = 0; bc < blockSize; bc++) {
227  dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
228  }
229  }
230  }
231  // Update x
232  for (LO v = 0; v < numVecs; v++) {
233  auto xRow = x.getLocalBlockHost(row, v, Tpetra::Access::ReadWrite);
234  for (LO k = 0; k < blockSize; k++) {
235  xRow(k) += omega * dinv_accum(k, v);
236  }
237  }
238  }
239  }
240 
241  // Version of apply for CrsMatrix/RowMatrix (for BlockCrsMatrix, call applyBlock)
242  void apply(multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction) {
243  if (direction == Tpetra::Symmetric) {
244  apply(x, b, Tpetra::Forward);
245  apply(x, b, Tpetra::Backward);
246  return;
247  }
248  const_values_t Avalues;
249  const_rowmap_t Arowmap;
250  const_entries_t Aentries;
251  if (haveRowMatrix) {
252  Avalues = rowMatrixValues;
253  Arowmap = rowMatrixRowmap;
254  Aentries = rowMatrixEntries;
255  } else {
256  auto Acrs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
257  if (Acrs.is_null())
258  throw std::runtime_error("Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
259  auto Alcl = Acrs->getLocalMatrixHost();
260  Avalues = Alcl.values;
261  Arowmap = Alcl.graph.row_map;
262  Aentries = Alcl.graph.entries;
263  }
264  if (applyRows.is_null()) {
265  if (x.getNumVectors() > 1)
266  this->template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
267  else {
268  // Optimize for the all-rows, single vector, omega = 1 case
269  if (omega == KAT::one())
270  this->template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
271  else
272  this->template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
273  }
274  } else {
275  this->template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
276  }
277  }
278 
280  // For efficiency, if input is a RowMatrix, keep a persistent copy of the CRS formatted local matrix.
281  bool haveRowMatrix;
282  values_t rowMatrixValues;
283  rowmap_t rowMatrixRowmap;
284  entries_t rowMatrixEntries;
285  LO numRows;
286  IST omega;
287  // If set up with BlockCrsMatrix, the block size. Otherwise 1.
288  LO blockSize;
289  // If using a non-block matrix, the inverse diagonal.
290  Teuchos::RCP<vector_type> inverseDiagVec;
291  // If using a block matrix, the inverses of all diagonal blocks.
292  InverseBlocksHost inverseBlockDiag;
293  // If null, apply over all rows in natural order. Otherwise, apply for each row listed in order.
294  Teuchos::ArrayRCP<LO> applyRows;
295 };
296 } // namespace Details
297 } // namespace Ifpack2
298 
299 #endif