41 #ifndef IFPACK2_GAUSS_SEIDEL_HPP
42 #define IFPACK2_GAUSS_SEIDEL_HPP
48 template<
typename Scalar,
typename LO,
typename GO,
typename NT>
51 using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
52 using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
53 using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
54 using local_matrix_device_type =
typename crs_matrix_type::local_matrix_device_type;
55 using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
56 using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
57 using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
58 using mem_space_t =
typename local_matrix_device_type::memory_space;
59 using rowmap_t =
typename local_matrix_device_type::row_map_type::HostMirror;
60 using entries_t =
typename local_matrix_device_type::index_type::HostMirror;
61 using values_t =
typename local_matrix_device_type::values_type::HostMirror;
62 using const_rowmap_t =
typename rowmap_t::const_type;
63 using const_entries_t =
typename entries_t::const_type;
64 using const_values_t =
typename values_t::const_type;
65 using Offset =
typename rowmap_t::non_const_value_type;
66 using IST =
typename crs_matrix_type::impl_scalar_type;
67 using KAT = Kokkos::ArithTraits<IST>;
69 using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
70 using InverseBlocksHost =
typename InverseBlocks::HostMirror;
72 typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
73 typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
74 typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
80 numRows = A_->getLocalNumRows();
81 haveRowMatrix =
false;
82 inverseDiagVec = inverseDiagVec_;
83 applyRows = applyRows_;
91 numRows = A_->getLocalNumRows();
93 inverseDiagVec = inverseDiagVec_;
94 applyRows = applyRows_;
98 rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing(
"Arowmap"), numRows + 1);
99 rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing(
"Aentries"), A_->getLocalNumEntries());
100 rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing(
"Avalues"), A_->getLocalNumEntries());
101 size_t maxDegree = A_->getLocalMaxNumRowEntries();
102 nonconst_values_host_view_type rowValues(
"rowValues", maxDegree);
103 nonconst_local_inds_host_view_type rowEntries(
"rowEntries", maxDegree);
105 for(LO i = 0; i <= numRows; i++)
107 rowMatrixRowmap(i) = accum;
111 A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
113 size_t rowBegin = rowMatrixRowmap(i);
114 for(
size_t j = 0; j < degree; j++)
116 rowMatrixEntries(rowBegin + j) = rowEntries[j];
117 rowMatrixValues(rowBegin + j) = rowValues[j];
125 numRows = A_->getLocalNumRows();
126 haveRowMatrix =
false;
128 inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
129 Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
130 applyRows = applyRows_;
132 blockSize = A_->getBlockSize();
135 template<
bool useApplyRows,
bool multipleRHS,
bool omegaNotOne>
136 void applyImpl(
const const_values_t& Avalues,
const const_rowmap_t& Arowmap,
const const_entries_t& Aentries, multivector_type& x,
const multivector_type& b, Tpetra::ESweepDirection direction)
139 LO numApplyRows = useApplyRows ? (LO) applyRows.size() : numRows;
141 auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
142 bool forward = direction == Tpetra::Forward;
145 LO numVecs = x.getNumVectors();
147 auto xlcl = x.get2dViewNonConst();
148 auto blcl = b.get2dView();
149 for(LO i = 0; i < numApplyRows; i++)
153 row = useApplyRows ? applyRows[i] : i;
155 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
156 for(LO k = 0; k < numVecs; k++)
158 accum[k] = KAT::zero();
160 Offset rowBegin = Arowmap(row);
161 Offset rowEnd = Arowmap(row + 1);
162 for(Offset j = rowBegin; j < rowEnd; j++)
164 LO col = Aentries(j);
165 IST val = Avalues(j);
166 for(LO k = 0; k < numVecs; k++)
168 accum[k] += val * IST(xlcl[k][col]);
172 IST dinv = inverseDiag(row);
173 for(LO k = 0; k < numVecs; k++)
176 xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
178 xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
184 auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
185 auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
186 auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
187 for(LO i = 0; i < numApplyRows; i++)
191 row = useApplyRows ? applyRows[i] : i;
193 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
194 IST accum = KAT::zero();
195 Offset rowBegin = Arowmap(row);
196 Offset rowEnd = Arowmap(row + 1);
197 for(Offset j = rowBegin; j < rowEnd; j++)
199 accum += Avalues(j) * xlcl(Aentries(j));
202 IST dinv = dlcl(row);
204 xlcl(row) += omega * dinv * (blcl(row) - accum);
206 xlcl(row) += dinv * (blcl(row) - accum);
211 void applyBlock(block_multivector_type& x,
const block_multivector_type& b, Tpetra::ESweepDirection direction)
213 if(direction == Tpetra::Symmetric)
215 applyBlock(x, b, Tpetra::Forward);
216 applyBlock(x, b, Tpetra::Backward);
219 auto Abcrs = Teuchos::rcp_dynamic_cast<
const bcrs_matrix_type>(A);
221 throw std::runtime_error(
"Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
222 auto Avalues = Abcrs->getValuesHost();
223 auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
224 auto Arowmap = AlclGraph.row_map;
225 auto Aentries = AlclGraph.entries;
227 Offset bs2 = blockSize * blockSize;
228 LO numVecs = x.getNumVectors();
229 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
230 Kokkos::ViewAllocateWithoutInitializing(
"BlockGaussSeidel Accumulator"), blockSize, numVecs);
231 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
232 Kokkos::ViewAllocateWithoutInitializing(
"BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
233 bool useApplyRows = !applyRows.is_null();
234 bool forward = direction == Tpetra::Forward;
235 LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
236 for(LO i = 0; i < numApplyRows; i++)
240 row = useApplyRows ? applyRows[i] : i;
242 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
243 for(LO v = 0; v < numVecs; v++)
245 auto bRow = b.getLocalBlockHost (row, v, Tpetra::Access::ReadOnly);
246 for(LO k = 0; k < blockSize; k++)
248 accum(k, v) = KAT::zero();
251 Offset rowBegin = Arowmap(row);
252 Offset rowEnd = Arowmap(row + 1);
253 for(Offset j = rowBegin; j < rowEnd; j++)
255 LO col = Aentries(j);
256 const IST* blk = &Avalues(j * bs2);
257 for(LO v = 0; v < numVecs; v++)
259 auto xCol = x.getLocalBlockHost (col, v, Tpetra::Access::ReadOnly);
260 for(LO br = 0; br < blockSize; br++)
262 for(LO bc = 0; bc < blockSize; bc++)
264 IST Aval = blk[br * blockSize + bc];
265 accum(br, v) += Aval * xCol(bc);
271 auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
272 Kokkos::deep_copy(dinv_accum, KAT::zero());
273 for(LO v = 0; v < numVecs; v++)
275 auto bRow = b.getLocalBlockHost (row, v, Tpetra::Access::ReadOnly);
276 for(LO br = 0; br < blockSize; br++)
278 accum(br, v) = bRow(br) - accum(br, v);
281 for(LO v = 0; v < numVecs; v++)
283 for(LO br = 0; br < blockSize; br++)
285 for(LO bc = 0; bc < blockSize; bc++)
287 dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
292 for(LO v = 0; v < numVecs; v++)
294 auto xRow = x.getLocalBlockHost (row, v, Tpetra::Access::ReadWrite);
295 for(LO k = 0; k < blockSize; k++)
297 xRow(k) += omega * dinv_accum(k, v);
304 void apply(multivector_type& x,
const multivector_type& b, Tpetra::ESweepDirection direction)
306 if(direction == Tpetra::Symmetric)
308 apply(x, b, Tpetra::Forward);
309 apply(x, b, Tpetra::Backward);
312 const_values_t Avalues;
313 const_rowmap_t Arowmap;
314 const_entries_t Aentries;
317 Avalues = rowMatrixValues;
318 Arowmap = rowMatrixRowmap;
319 Aentries = rowMatrixEntries;
323 auto Acrs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A);
325 throw std::runtime_error(
"Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
326 auto Alcl = Acrs->getLocalMatrixHost();
327 Avalues = Alcl.values;
328 Arowmap = Alcl.graph.row_map;
329 Aentries = Alcl.graph.entries;
331 if(applyRows.is_null())
333 if(x.getNumVectors() > 1)
334 this->
template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
338 if(omega == KAT::one())
339 this->
template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
341 this->
template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
346 this->
template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
353 values_t rowMatrixValues;
354 rowmap_t rowMatrixRowmap;
355 entries_t rowMatrixEntries;
363 InverseBlocksHost inverseBlockDiag;