Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_GaussSeidel.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ***********************************************************************
38 //@HEADER
39 */
40 
41 #ifndef IFPACK2_GAUSS_SEIDEL_HPP
42 #define IFPACK2_GAUSS_SEIDEL_HPP
43 
44 namespace Ifpack2
45 {
46 namespace Details
47 {
48  template<typename Scalar, typename LO, typename GO, typename NT>
49  struct GaussSeidel
50  {
51  using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
52  using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
53  using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
54  using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
55  using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
56  using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
57  using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
58  using mem_space_t = typename local_matrix_device_type::memory_space;
59  using rowmap_t = typename local_matrix_device_type::row_map_type::HostMirror;
60  using entries_t = typename local_matrix_device_type::index_type::HostMirror;
61  using values_t = typename local_matrix_device_type::values_type::HostMirror;
62  using const_rowmap_t = typename rowmap_t::const_type;
63  using const_entries_t = typename entries_t::const_type;
64  using const_values_t = typename values_t::const_type;
65  using Offset = typename rowmap_t::non_const_value_type;
66  using IST = typename crs_matrix_type::impl_scalar_type;
67  using KAT = Kokkos::ArithTraits<IST>;
68  //Type of view representing inverse diagonal blocks, and its HostMirror.
69  using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
70  using InverseBlocksHost = typename InverseBlocks::HostMirror;
71 
72  typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
73  typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
74  typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
75 
76  //Setup for CrsMatrix
77  GaussSeidel(Teuchos::RCP<const crs_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_)
78  {
79  A = A_;
80  numRows = A_->getLocalNumRows();
81  haveRowMatrix = false;
82  inverseDiagVec = inverseDiagVec_;
83  applyRows = applyRows_;
84  blockSize = 1;
85  omega = omega_;
86  }
87 
88  GaussSeidel(Teuchos::RCP<const row_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_)
89  {
90  A = A_;
91  numRows = A_->getLocalNumRows();
92  haveRowMatrix = true;
93  inverseDiagVec = inverseDiagVec_;
94  applyRows = applyRows_;
95  blockSize = 1;
96  omega = omega_;
97  //Here, need to make a deep CRS copy to avoid slow access via getLocalRowCopy
98  rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing("Arowmap"), numRows + 1);
99  rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing("Aentries"), A_->getLocalNumEntries());
100  rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing("Avalues"), A_->getLocalNumEntries());
101  size_t maxDegree = A_->getLocalMaxNumRowEntries();
102  nonconst_values_host_view_type rowValues("rowValues", maxDegree);
103  nonconst_local_inds_host_view_type rowEntries("rowEntries", maxDegree);
104  size_t accum = 0;
105  for(LO i = 0; i <= numRows; i++)
106  {
107  rowMatrixRowmap(i) = accum;
108  if(i == numRows)
109  break;
110  size_t degree;
111  A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
112  accum += degree;
113  size_t rowBegin = rowMatrixRowmap(i);
114  for(size_t j = 0; j < degree; j++)
115  {
116  rowMatrixEntries(rowBegin + j) = rowEntries[j];
117  rowMatrixValues(rowBegin + j) = rowValues[j];
118  }
119  }
120  }
121 
122  GaussSeidel(Teuchos::RCP<const bcrs_matrix_type> A_, const InverseBlocks& inverseBlockDiag_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_)
123  {
124  A = A_;
125  numRows = A_->getLocalNumRows();
126  haveRowMatrix = false;
127  //note: next 2 lines are no-ops if inverseBlockDiag_ is already host-accessible
128  inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
129  Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
130  applyRows = applyRows_;
131  omega = omega_;
132  blockSize = A_->getBlockSize();
133  }
134 
135  template<bool useApplyRows, bool multipleRHS, bool omegaNotOne>
136  void applyImpl(const const_values_t& Avalues, const const_rowmap_t& Arowmap, const const_entries_t& Aentries, multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction)
137  {
138  //note: direction is either Forward or Backward (Symmetric is handled in apply())
139  LO numApplyRows = useApplyRows ? (LO) applyRows.size() : numRows;
140  //note: inverseDiagMV always has only one column
141  auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
142  bool forward = direction == Tpetra::Forward;
143  if(multipleRHS)
144  {
145  LO numVecs = x.getNumVectors();
146  Teuchos::Array<IST> accum(numVecs);
147  auto xlcl = x.get2dViewNonConst();
148  auto blcl = b.get2dView();
149  for(LO i = 0; i < numApplyRows; i++)
150  {
151  LO row;
152  if(forward)
153  row = useApplyRows ? applyRows[i] : i;
154  else
155  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
156  for(LO k = 0; k < numVecs; k++)
157  {
158  accum[k] = KAT::zero();
159  }
160  Offset rowBegin = Arowmap(row);
161  Offset rowEnd = Arowmap(row + 1);
162  for(Offset j = rowBegin; j < rowEnd; j++)
163  {
164  LO col = Aentries(j);
165  IST val = Avalues(j);
166  for(LO k = 0; k < numVecs; k++)
167  {
168  accum[k] += val * IST(xlcl[k][col]);
169  }
170  }
171  //Update x
172  IST dinv = inverseDiag(row);
173  for(LO k = 0; k < numVecs; k++)
174  {
175  if(omegaNotOne)
176  xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
177  else
178  xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
179  }
180  }
181  }
182  else
183  {
184  auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
185  auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
186  auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
187  for(LO i = 0; i < numApplyRows; i++)
188  {
189  LO row;
190  if(forward)
191  row = useApplyRows ? applyRows[i] : i;
192  else
193  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
194  IST accum = KAT::zero();
195  Offset rowBegin = Arowmap(row);
196  Offset rowEnd = Arowmap(row + 1);
197  for(Offset j = rowBegin; j < rowEnd; j++)
198  {
199  accum += Avalues(j) * xlcl(Aentries(j));
200  }
201  //Update x
202  IST dinv = dlcl(row);
203  if(omegaNotOne)
204  xlcl(row) += omega * dinv * (blcl(row) - accum);
205  else
206  xlcl(row) += dinv * (blcl(row) - accum);
207  }
208  }
209  }
210 
211  void applyBlock(block_multivector_type& x, const block_multivector_type& b, Tpetra::ESweepDirection direction)
212  {
213  if(direction == Tpetra::Symmetric)
214  {
215  applyBlock(x, b, Tpetra::Forward);
216  applyBlock(x, b, Tpetra::Backward);
217  return;
218  }
219  auto Abcrs = Teuchos::rcp_dynamic_cast<const bcrs_matrix_type>(A);
220  if(Abcrs.is_null())
221  throw std::runtime_error("Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
222  auto Avalues = Abcrs->getValuesHost();
223  auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
224  auto Arowmap = AlclGraph.row_map;
225  auto Aentries = AlclGraph.entries;
226  //Number of scalars in Avalues per block entry.
227  Offset bs2 = blockSize * blockSize;
228  LO numVecs = x.getNumVectors();
229  Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
230  Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel Accumulator"), blockSize, numVecs);
231  Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
232  Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
233  bool useApplyRows = !applyRows.is_null();
234  bool forward = direction == Tpetra::Forward;
235  LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
236  for(LO i = 0; i < numApplyRows; i++)
237  {
238  LO row;
239  if(forward)
240  row = useApplyRows ? applyRows[i] : i;
241  else
242  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
243  for(LO v = 0; v < numVecs; v++)
244  {
245  auto bRow = b.getLocalBlockHost (row, v, Tpetra::Access::ReadOnly);
246  for(LO k = 0; k < blockSize; k++)
247  {
248  accum(k, v) = KAT::zero();
249  }
250  }
251  Offset rowBegin = Arowmap(row);
252  Offset rowEnd = Arowmap(row + 1);
253  for(Offset j = rowBegin; j < rowEnd; j++)
254  {
255  LO col = Aentries(j);
256  const IST* blk = &Avalues(j * bs2);
257  for(LO v = 0; v < numVecs; v++)
258  {
259  auto xCol = x.getLocalBlockHost (col, v, Tpetra::Access::ReadOnly);
260  for(LO br = 0; br < blockSize; br++)
261  {
262  for(LO bc = 0; bc < blockSize; bc++)
263  {
264  IST Aval = blk[br * blockSize + bc];
265  accum(br, v) += Aval * xCol(bc);
266  }
267  }
268  }
269  }
270  //Update x: term is omega * Aii^-1 * accum, where omega is scalar, Aii^-1 is bs*bs, and accum is bs*nv
271  auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
272  Kokkos::deep_copy(dinv_accum, KAT::zero());
273  for(LO v = 0; v < numVecs; v++)
274  {
275  auto bRow = b.getLocalBlockHost (row, v, Tpetra::Access::ReadOnly);
276  for(LO br = 0; br < blockSize; br++)
277  {
278  accum(br, v) = bRow(br) - accum(br, v);
279  }
280  }
281  for(LO v = 0; v < numVecs; v++)
282  {
283  for(LO br = 0; br < blockSize; br++)
284  {
285  for(LO bc = 0; bc < blockSize; bc++)
286  {
287  dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
288  }
289  }
290  }
291  //Update x
292  for(LO v = 0; v < numVecs; v++)
293  {
294  auto xRow = x.getLocalBlockHost (row, v, Tpetra::Access::ReadWrite);
295  for(LO k = 0; k < blockSize; k++)
296  {
297  xRow(k) += omega * dinv_accum(k, v);
298  }
299  }
300  }
301  }
302 
303  //Version of apply for CrsMatrix/RowMatrix (for BlockCrsMatrix, call applyBlock)
304  void apply(multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction)
305  {
306  if(direction == Tpetra::Symmetric)
307  {
308  apply(x, b, Tpetra::Forward);
309  apply(x, b, Tpetra::Backward);
310  return;
311  }
312  const_values_t Avalues;
313  const_rowmap_t Arowmap;
314  const_entries_t Aentries;
315  if(haveRowMatrix)
316  {
317  Avalues = rowMatrixValues;
318  Arowmap = rowMatrixRowmap;
319  Aentries = rowMatrixEntries;
320  }
321  else
322  {
323  auto Acrs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
324  if(Acrs.is_null())
325  throw std::runtime_error("Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
326  auto Alcl = Acrs->getLocalMatrixHost();
327  Avalues = Alcl.values;
328  Arowmap = Alcl.graph.row_map;
329  Aentries = Alcl.graph.entries;
330  }
331  if(applyRows.is_null())
332  {
333  if(x.getNumVectors() > 1)
334  this->template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
335  else
336  {
337  //Optimize for the all-rows, single vector, omega = 1 case
338  if(omega == KAT::one())
339  this->template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
340  else
341  this->template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
342  }
343  }
344  else
345  {
346  this->template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
347  }
348  }
349 
351  //For efficiency, if input is a RowMatrix, keep a persistent copy of the CRS formatted local matrix.
352  bool haveRowMatrix;
353  values_t rowMatrixValues;
354  rowmap_t rowMatrixRowmap;
355  entries_t rowMatrixEntries;
356  LO numRows;
357  IST omega;
358  //If set up with BlockCrsMatrix, the block size. Otherwise 1.
359  LO blockSize;
360  //If using a non-block matrix, the inverse diagonal.
361  Teuchos::RCP<vector_type> inverseDiagVec;
362  //If using a block matrix, the inverses of all diagonal blocks.
363  InverseBlocksHost inverseBlockDiag;
364  //If null, apply over all rows in natural order. Otherwise, apply for each row listed in order.
365  Teuchos::ArrayRCP<LO> applyRows;
366  };
367 }
368 }
369 
370 #endif