Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_GaussSeidel.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_GAUSS_SEIDEL_HPP
11 #define IFPACK2_GAUSS_SEIDEL_HPP
12 
13 namespace Ifpack2
14 {
15 namespace Details
16 {
17  template<typename Scalar, typename LO, typename GO, typename NT>
18  struct GaussSeidel
19  {
20  using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
21  using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
22  using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
23  using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
24  using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
25  using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
26  using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
27  using mem_space_t = typename local_matrix_device_type::memory_space;
28  using rowmap_t = typename local_matrix_device_type::row_map_type::HostMirror;
29  using entries_t = typename local_matrix_device_type::index_type::HostMirror;
30  using values_t = typename local_matrix_device_type::values_type::HostMirror;
31  using const_rowmap_t = typename rowmap_t::const_type;
32  using const_entries_t = typename entries_t::const_type;
33  using const_values_t = typename values_t::const_type;
34  using Offset = typename rowmap_t::non_const_value_type;
35  using IST = typename crs_matrix_type::impl_scalar_type;
36  using KAT = Kokkos::ArithTraits<IST>;
37  //Type of view representing inverse diagonal blocks, and its HostMirror.
38  using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
39  using InverseBlocksHost = typename InverseBlocks::HostMirror;
40 
41  typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
42  typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
43  typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
44 
45  //Setup for CrsMatrix
46  GaussSeidel(Teuchos::RCP<const crs_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_)
47  {
48  A = A_;
49  numRows = A_->getLocalNumRows();
50  haveRowMatrix = false;
51  inverseDiagVec = inverseDiagVec_;
52  applyRows = applyRows_;
53  blockSize = 1;
54  omega = omega_;
55  }
56 
57  GaussSeidel(Teuchos::RCP<const row_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_)
58  {
59  A = A_;
60  numRows = A_->getLocalNumRows();
61  haveRowMatrix = true;
62  inverseDiagVec = inverseDiagVec_;
63  applyRows = applyRows_;
64  blockSize = 1;
65  omega = omega_;
66  //Here, need to make a deep CRS copy to avoid slow access via getLocalRowCopy
67  rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing("Arowmap"), numRows + 1);
68  rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing("Aentries"), A_->getLocalNumEntries());
69  rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing("Avalues"), A_->getLocalNumEntries());
70  size_t maxDegree = A_->getLocalMaxNumRowEntries();
71  nonconst_values_host_view_type rowValues("rowValues", maxDegree);
72  nonconst_local_inds_host_view_type rowEntries("rowEntries", maxDegree);
73  size_t accum = 0;
74  for(LO i = 0; i <= numRows; i++)
75  {
76  rowMatrixRowmap(i) = accum;
77  if(i == numRows)
78  break;
79  size_t degree;
80  A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
81  accum += degree;
82  size_t rowBegin = rowMatrixRowmap(i);
83  for(size_t j = 0; j < degree; j++)
84  {
85  rowMatrixEntries(rowBegin + j) = rowEntries[j];
86  rowMatrixValues(rowBegin + j) = rowValues[j];
87  }
88  }
89  }
90 
91  GaussSeidel(Teuchos::RCP<const bcrs_matrix_type> A_, const InverseBlocks& inverseBlockDiag_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_)
92  {
93  A = A_;
94  numRows = A_->getLocalNumRows();
95  haveRowMatrix = false;
96  //note: next 2 lines are no-ops if inverseBlockDiag_ is already host-accessible
97  inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
98  Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
99  applyRows = applyRows_;
100  omega = omega_;
101  blockSize = A_->getBlockSize();
102  }
103 
104  template<bool useApplyRows, bool multipleRHS, bool omegaNotOne>
105  void applyImpl(const const_values_t& Avalues, const const_rowmap_t& Arowmap, const const_entries_t& Aentries, multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction)
106  {
107  //note: direction is either Forward or Backward (Symmetric is handled in apply())
108  LO numApplyRows = useApplyRows ? (LO) applyRows.size() : numRows;
109  //note: inverseDiagMV always has only one column
110  auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
111  bool forward = direction == Tpetra::Forward;
112  if(multipleRHS)
113  {
114  LO numVecs = x.getNumVectors();
115  Teuchos::Array<IST> accum(numVecs);
116  auto xlcl = x.get2dViewNonConst();
117  auto blcl = b.get2dView();
118  for(LO i = 0; i < numApplyRows; i++)
119  {
120  LO row;
121  if(forward)
122  row = useApplyRows ? applyRows[i] : i;
123  else
124  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
125  for(LO k = 0; k < numVecs; k++)
126  {
127  accum[k] = KAT::zero();
128  }
129  Offset rowBegin = Arowmap(row);
130  Offset rowEnd = Arowmap(row + 1);
131  for(Offset j = rowBegin; j < rowEnd; j++)
132  {
133  LO col = Aentries(j);
134  IST val = Avalues(j);
135  for(LO k = 0; k < numVecs; k++)
136  {
137  accum[k] += val * IST(xlcl[k][col]);
138  }
139  }
140  //Update x
141  IST dinv = inverseDiag(row);
142  for(LO k = 0; k < numVecs; k++)
143  {
144  if(omegaNotOne)
145  xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
146  else
147  xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
148  }
149  }
150  }
151  else
152  {
153  auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
154  auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
155  auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
156  for(LO i = 0; i < numApplyRows; i++)
157  {
158  LO row;
159  if(forward)
160  row = useApplyRows ? applyRows[i] : i;
161  else
162  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
163  IST accum = KAT::zero();
164  Offset rowBegin = Arowmap(row);
165  Offset rowEnd = Arowmap(row + 1);
166  for(Offset j = rowBegin; j < rowEnd; j++)
167  {
168  accum += Avalues(j) * xlcl(Aentries(j));
169  }
170  //Update x
171  IST dinv = dlcl(row);
172  if(omegaNotOne)
173  xlcl(row) += omega * dinv * (blcl(row) - accum);
174  else
175  xlcl(row) += dinv * (blcl(row) - accum);
176  }
177  }
178  }
179 
180  void applyBlock(block_multivector_type& x, const block_multivector_type& b, Tpetra::ESweepDirection direction)
181  {
182  if(direction == Tpetra::Symmetric)
183  {
184  applyBlock(x, b, Tpetra::Forward);
185  applyBlock(x, b, Tpetra::Backward);
186  return;
187  }
188  auto Abcrs = Teuchos::rcp_dynamic_cast<const bcrs_matrix_type>(A);
189  if(Abcrs.is_null())
190  throw std::runtime_error("Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
191  auto Avalues = Abcrs->getValuesHost();
192  auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
193  auto Arowmap = AlclGraph.row_map;
194  auto Aentries = AlclGraph.entries;
195  //Number of scalars in Avalues per block entry.
196  Offset bs2 = blockSize * blockSize;
197  LO numVecs = x.getNumVectors();
198  Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
199  Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel Accumulator"), blockSize, numVecs);
200  Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
201  Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
202  bool useApplyRows = !applyRows.is_null();
203  bool forward = direction == Tpetra::Forward;
204  LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
205  for(LO i = 0; i < numApplyRows; i++)
206  {
207  LO row;
208  if(forward)
209  row = useApplyRows ? applyRows[i] : i;
210  else
211  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
212  for(LO v = 0; v < numVecs; v++)
213  {
214  auto bRow = b.getLocalBlockHost (row, v, Tpetra::Access::ReadOnly);
215  for(LO k = 0; k < blockSize; k++)
216  {
217  accum(k, v) = KAT::zero();
218  }
219  }
220  Offset rowBegin = Arowmap(row);
221  Offset rowEnd = Arowmap(row + 1);
222  for(Offset j = rowBegin; j < rowEnd; j++)
223  {
224  LO col = Aentries(j);
225  const IST* blk = &Avalues(j * bs2);
226  for(LO v = 0; v < numVecs; v++)
227  {
228  auto xCol = x.getLocalBlockHost (col, v, Tpetra::Access::ReadOnly);
229  for(LO br = 0; br < blockSize; br++)
230  {
231  for(LO bc = 0; bc < blockSize; bc++)
232  {
233  IST Aval = blk[br * blockSize + bc];
234  accum(br, v) += Aval * xCol(bc);
235  }
236  }
237  }
238  }
239  //Update x: term is omega * Aii^-1 * accum, where omega is scalar, Aii^-1 is bs*bs, and accum is bs*nv
240  auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
241  Kokkos::deep_copy(dinv_accum, KAT::zero());
242  for(LO v = 0; v < numVecs; v++)
243  {
244  auto bRow = b.getLocalBlockHost (row, v, Tpetra::Access::ReadOnly);
245  for(LO br = 0; br < blockSize; br++)
246  {
247  accum(br, v) = bRow(br) - accum(br, v);
248  }
249  }
250  for(LO v = 0; v < numVecs; v++)
251  {
252  for(LO br = 0; br < blockSize; br++)
253  {
254  for(LO bc = 0; bc < blockSize; bc++)
255  {
256  dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
257  }
258  }
259  }
260  //Update x
261  for(LO v = 0; v < numVecs; v++)
262  {
263  auto xRow = x.getLocalBlockHost (row, v, Tpetra::Access::ReadWrite);
264  for(LO k = 0; k < blockSize; k++)
265  {
266  xRow(k) += omega * dinv_accum(k, v);
267  }
268  }
269  }
270  }
271 
272  //Version of apply for CrsMatrix/RowMatrix (for BlockCrsMatrix, call applyBlock)
273  void apply(multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction)
274  {
275  if(direction == Tpetra::Symmetric)
276  {
277  apply(x, b, Tpetra::Forward);
278  apply(x, b, Tpetra::Backward);
279  return;
280  }
281  const_values_t Avalues;
282  const_rowmap_t Arowmap;
283  const_entries_t Aentries;
284  if(haveRowMatrix)
285  {
286  Avalues = rowMatrixValues;
287  Arowmap = rowMatrixRowmap;
288  Aentries = rowMatrixEntries;
289  }
290  else
291  {
292  auto Acrs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
293  if(Acrs.is_null())
294  throw std::runtime_error("Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
295  auto Alcl = Acrs->getLocalMatrixHost();
296  Avalues = Alcl.values;
297  Arowmap = Alcl.graph.row_map;
298  Aentries = Alcl.graph.entries;
299  }
300  if(applyRows.is_null())
301  {
302  if(x.getNumVectors() > 1)
303  this->template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
304  else
305  {
306  //Optimize for the all-rows, single vector, omega = 1 case
307  if(omega == KAT::one())
308  this->template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
309  else
310  this->template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
311  }
312  }
313  else
314  {
315  this->template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
316  }
317  }
318 
320  //For efficiency, if input is a RowMatrix, keep a persistent copy of the CRS formatted local matrix.
321  bool haveRowMatrix;
322  values_t rowMatrixValues;
323  rowmap_t rowMatrixRowmap;
324  entries_t rowMatrixEntries;
325  LO numRows;
326  IST omega;
327  //If set up with BlockCrsMatrix, the block size. Otherwise 1.
328  LO blockSize;
329  //If using a non-block matrix, the inverse diagonal.
330  Teuchos::RCP<vector_type> inverseDiagVec;
331  //If using a block matrix, the inverses of all diagonal blocks.
332  InverseBlocksHost inverseBlockDiag;
333  //If null, apply over all rows in natural order. Otherwise, apply for each row listed in order.
334  Teuchos::ArrayRCP<LO> applyRows;
335  };
336 }
337 }
338 
339 #endif