10 #ifndef IFPACK2_GAUSS_SEIDEL_HPP
11 #define IFPACK2_GAUSS_SEIDEL_HPP
17 template<
typename Scalar,
typename LO,
typename GO,
typename NT>
20 using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
21 using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
22 using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
23 using local_matrix_device_type =
typename crs_matrix_type::local_matrix_device_type;
24 using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
25 using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
26 using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
27 using mem_space_t =
typename local_matrix_device_type::memory_space;
28 using rowmap_t =
typename local_matrix_device_type::row_map_type::HostMirror;
29 using entries_t =
typename local_matrix_device_type::index_type::HostMirror;
30 using values_t =
typename local_matrix_device_type::values_type::HostMirror;
31 using const_rowmap_t =
typename rowmap_t::const_type;
32 using const_entries_t =
typename entries_t::const_type;
33 using const_values_t =
typename values_t::const_type;
34 using Offset =
typename rowmap_t::non_const_value_type;
35 using IST =
typename crs_matrix_type::impl_scalar_type;
36 using KAT = Kokkos::ArithTraits<IST>;
38 using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
39 using InverseBlocksHost =
typename InverseBlocks::HostMirror;
41 typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
42 typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
43 typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
49 numRows = A_->getLocalNumRows();
50 haveRowMatrix =
false;
51 inverseDiagVec = inverseDiagVec_;
52 applyRows = applyRows_;
60 numRows = A_->getLocalNumRows();
62 inverseDiagVec = inverseDiagVec_;
63 applyRows = applyRows_;
67 rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing(
"Arowmap"), numRows + 1);
68 rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing(
"Aentries"), A_->getLocalNumEntries());
69 rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing(
"Avalues"), A_->getLocalNumEntries());
70 size_t maxDegree = A_->getLocalMaxNumRowEntries();
71 nonconst_values_host_view_type rowValues(
"rowValues", maxDegree);
72 nonconst_local_inds_host_view_type rowEntries(
"rowEntries", maxDegree);
74 for(LO i = 0; i <= numRows; i++)
76 rowMatrixRowmap(i) = accum;
80 A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
82 size_t rowBegin = rowMatrixRowmap(i);
83 for(
size_t j = 0; j < degree; j++)
85 rowMatrixEntries(rowBegin + j) = rowEntries[j];
86 rowMatrixValues(rowBegin + j) = rowValues[j];
94 numRows = A_->getLocalNumRows();
95 haveRowMatrix =
false;
97 inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
98 Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
99 applyRows = applyRows_;
101 blockSize = A_->getBlockSize();
104 template<
bool useApplyRows,
bool multipleRHS,
bool omegaNotOne>
105 void applyImpl(
const const_values_t& Avalues,
const const_rowmap_t& Arowmap,
const const_entries_t& Aentries, multivector_type& x,
const multivector_type& b, Tpetra::ESweepDirection direction)
108 LO numApplyRows = useApplyRows ? (LO) applyRows.size() : numRows;
110 auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
111 bool forward = direction == Tpetra::Forward;
114 LO numVecs = x.getNumVectors();
116 auto xlcl = x.get2dViewNonConst();
117 auto blcl = b.get2dView();
118 for(LO i = 0; i < numApplyRows; i++)
122 row = useApplyRows ? applyRows[i] : i;
124 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
125 for(LO k = 0; k < numVecs; k++)
127 accum[k] = KAT::zero();
129 Offset rowBegin = Arowmap(row);
130 Offset rowEnd = Arowmap(row + 1);
131 for(Offset j = rowBegin; j < rowEnd; j++)
133 LO col = Aentries(j);
134 IST val = Avalues(j);
135 for(LO k = 0; k < numVecs; k++)
137 accum[k] += val * IST(xlcl[k][col]);
141 IST dinv = inverseDiag(row);
142 for(LO k = 0; k < numVecs; k++)
145 xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
147 xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
153 auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
154 auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
155 auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
156 for(LO i = 0; i < numApplyRows; i++)
160 row = useApplyRows ? applyRows[i] : i;
162 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
163 IST accum = KAT::zero();
164 Offset rowBegin = Arowmap(row);
165 Offset rowEnd = Arowmap(row + 1);
166 for(Offset j = rowBegin; j < rowEnd; j++)
168 accum += Avalues(j) * xlcl(Aentries(j));
171 IST dinv = dlcl(row);
173 xlcl(row) += omega * dinv * (blcl(row) - accum);
175 xlcl(row) += dinv * (blcl(row) - accum);
180 void applyBlock(block_multivector_type& x,
const block_multivector_type& b, Tpetra::ESweepDirection direction)
182 if(direction == Tpetra::Symmetric)
184 applyBlock(x, b, Tpetra::Forward);
185 applyBlock(x, b, Tpetra::Backward);
188 auto Abcrs = Teuchos::rcp_dynamic_cast<
const bcrs_matrix_type>(A);
190 throw std::runtime_error(
"Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
191 auto Avalues = Abcrs->getValuesHost();
192 auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
193 auto Arowmap = AlclGraph.row_map;
194 auto Aentries = AlclGraph.entries;
196 Offset bs2 = blockSize * blockSize;
197 LO numVecs = x.getNumVectors();
198 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
199 Kokkos::ViewAllocateWithoutInitializing(
"BlockGaussSeidel Accumulator"), blockSize, numVecs);
200 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
201 Kokkos::ViewAllocateWithoutInitializing(
"BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
202 bool useApplyRows = !applyRows.is_null();
203 bool forward = direction == Tpetra::Forward;
204 LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
205 for(LO i = 0; i < numApplyRows; i++)
209 row = useApplyRows ? applyRows[i] : i;
211 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
212 for(LO v = 0; v < numVecs; v++)
214 auto bRow = b.getLocalBlockHost (row, v, Tpetra::Access::ReadOnly);
215 for(LO k = 0; k < blockSize; k++)
217 accum(k, v) = KAT::zero();
220 Offset rowBegin = Arowmap(row);
221 Offset rowEnd = Arowmap(row + 1);
222 for(Offset j = rowBegin; j < rowEnd; j++)
224 LO col = Aentries(j);
225 const IST* blk = &Avalues(j * bs2);
226 for(LO v = 0; v < numVecs; v++)
228 auto xCol = x.getLocalBlockHost (col, v, Tpetra::Access::ReadOnly);
229 for(LO br = 0; br < blockSize; br++)
231 for(LO bc = 0; bc < blockSize; bc++)
233 IST Aval = blk[br * blockSize + bc];
234 accum(br, v) += Aval * xCol(bc);
240 auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
241 Kokkos::deep_copy(dinv_accum, KAT::zero());
242 for(LO v = 0; v < numVecs; v++)
244 auto bRow = b.getLocalBlockHost (row, v, Tpetra::Access::ReadOnly);
245 for(LO br = 0; br < blockSize; br++)
247 accum(br, v) = bRow(br) - accum(br, v);
250 for(LO v = 0; v < numVecs; v++)
252 for(LO br = 0; br < blockSize; br++)
254 for(LO bc = 0; bc < blockSize; bc++)
256 dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
261 for(LO v = 0; v < numVecs; v++)
263 auto xRow = x.getLocalBlockHost (row, v, Tpetra::Access::ReadWrite);
264 for(LO k = 0; k < blockSize; k++)
266 xRow(k) += omega * dinv_accum(k, v);
273 void apply(multivector_type& x,
const multivector_type& b, Tpetra::ESweepDirection direction)
275 if(direction == Tpetra::Symmetric)
277 apply(x, b, Tpetra::Forward);
278 apply(x, b, Tpetra::Backward);
281 const_values_t Avalues;
282 const_rowmap_t Arowmap;
283 const_entries_t Aentries;
286 Avalues = rowMatrixValues;
287 Arowmap = rowMatrixRowmap;
288 Aentries = rowMatrixEntries;
292 auto Acrs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A);
294 throw std::runtime_error(
"Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
295 auto Alcl = Acrs->getLocalMatrixHost();
296 Avalues = Alcl.values;
297 Arowmap = Alcl.graph.row_map;
298 Aentries = Alcl.graph.entries;
300 if(applyRows.is_null())
302 if(x.getNumVectors() > 1)
303 this->
template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
307 if(omega == KAT::one())
308 this->
template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
310 this->
template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
315 this->
template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
322 values_t rowMatrixValues;
323 rowmap_t rowMatrixRowmap;
324 entries_t rowMatrixEntries;
332 InverseBlocksHost inverseBlockDiag;