10 #ifndef IFPACK2_GAUSS_SEIDEL_HPP
11 #define IFPACK2_GAUSS_SEIDEL_HPP
15 template <
typename Scalar,
typename LO,
typename GO,
typename NT>
17 using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
18 using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
19 using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
20 using local_matrix_device_type =
typename crs_matrix_type::local_matrix_device_type;
21 using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
22 using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
23 using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
24 using mem_space_t =
typename local_matrix_device_type::memory_space;
25 using rowmap_t =
typename local_matrix_device_type::row_map_type::host_mirror_type;
26 using entries_t =
typename local_matrix_device_type::index_type::host_mirror_type;
27 using values_t =
typename local_matrix_device_type::values_type::host_mirror_type;
28 using const_rowmap_t =
typename rowmap_t::const_type;
29 using const_entries_t =
typename entries_t::const_type;
30 using const_values_t =
typename values_t::const_type;
31 using Offset =
typename rowmap_t::non_const_value_type;
32 using IST =
typename crs_matrix_type::impl_scalar_type;
33 #if KOKKOS_VERSION >= 40799
34 using KAT = KokkosKernels::ArithTraits<IST>;
36 using KAT = Kokkos::ArithTraits<IST>;
39 using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
40 using InverseBlocksHost =
typename InverseBlocks::host_mirror_type;
42 typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
43 typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
44 typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
49 numRows = A_->getLocalNumRows();
50 haveRowMatrix =
false;
51 inverseDiagVec = inverseDiagVec_;
52 applyRows = applyRows_;
59 numRows = A_->getLocalNumRows();
61 inverseDiagVec = inverseDiagVec_;
62 applyRows = applyRows_;
66 rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing(
"Arowmap"), numRows + 1);
67 rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing(
"Aentries"), A_->getLocalNumEntries());
68 rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing(
"Avalues"), A_->getLocalNumEntries());
69 size_t maxDegree = A_->getLocalMaxNumRowEntries();
70 nonconst_values_host_view_type rowValues(
"rowValues", maxDegree);
71 nonconst_local_inds_host_view_type rowEntries(
"rowEntries", maxDegree);
73 for (LO i = 0; i <= numRows; i++) {
74 rowMatrixRowmap(i) = accum;
78 A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
80 size_t rowBegin = rowMatrixRowmap(i);
81 for (
size_t j = 0; j < degree; j++) {
82 rowMatrixEntries(rowBegin + j) = rowEntries[j];
83 rowMatrixValues(rowBegin + j) = rowValues[j];
90 numRows = A_->getLocalNumRows();
91 haveRowMatrix =
false;
93 inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
94 Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
95 applyRows = applyRows_;
97 blockSize = A_->getBlockSize();
100 template <
bool useApplyRows,
bool multipleRHS,
bool omegaNotOne>
101 void applyImpl(
const const_values_t& Avalues,
const const_rowmap_t& Arowmap,
const const_entries_t& Aentries, multivector_type& x,
const multivector_type& b, Tpetra::ESweepDirection direction) {
103 LO numApplyRows = useApplyRows ? (LO)applyRows.size() : numRows;
105 auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
106 bool forward = direction == Tpetra::Forward;
108 LO numVecs = x.getNumVectors();
110 auto xlcl = x.get2dViewNonConst();
111 auto blcl = b.get2dView();
112 for (LO i = 0; i < numApplyRows; i++) {
115 row = useApplyRows ? applyRows[i] : i;
117 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
118 for (LO k = 0; k < numVecs; k++) {
119 accum[k] = KAT::zero();
121 Offset rowBegin = Arowmap(row);
122 Offset rowEnd = Arowmap(row + 1);
123 for (Offset j = rowBegin; j < rowEnd; j++) {
124 LO col = Aentries(j);
125 IST val = Avalues(j);
126 for (LO k = 0; k < numVecs; k++) {
127 accum[k] += val * IST(xlcl[k][col]);
131 IST dinv = inverseDiag(row);
132 for (LO k = 0; k < numVecs; k++) {
134 xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
136 xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
140 auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
141 auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
142 auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
143 for (LO i = 0; i < numApplyRows; i++) {
146 row = useApplyRows ? applyRows[i] : i;
148 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
149 IST accum = KAT::zero();
150 Offset rowBegin = Arowmap(row);
151 Offset rowEnd = Arowmap(row + 1);
152 for (Offset j = rowBegin; j < rowEnd; j++) {
153 accum += Avalues(j) * xlcl(Aentries(j));
156 IST dinv = dlcl(row);
158 xlcl(row) += omega * dinv * (blcl(row) - accum);
160 xlcl(row) += dinv * (blcl(row) - accum);
165 void applyBlock(block_multivector_type& x,
const block_multivector_type& b, Tpetra::ESweepDirection direction) {
166 if (direction == Tpetra::Symmetric) {
167 applyBlock(x, b, Tpetra::Forward);
168 applyBlock(x, b, Tpetra::Backward);
171 auto Abcrs = Teuchos::rcp_dynamic_cast<
const bcrs_matrix_type>(A);
173 throw std::runtime_error(
"Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
174 auto Avalues = Abcrs->getValuesHost();
175 auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
176 auto Arowmap = AlclGraph.row_map;
177 auto Aentries = AlclGraph.entries;
179 Offset bs2 = blockSize * blockSize;
180 LO numVecs = x.getNumVectors();
181 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
182 Kokkos::ViewAllocateWithoutInitializing(
"BlockGaussSeidel Accumulator"), blockSize, numVecs);
183 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
184 Kokkos::ViewAllocateWithoutInitializing(
"BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
185 bool useApplyRows = !applyRows.is_null();
186 bool forward = direction == Tpetra::Forward;
187 LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
188 for (LO i = 0; i < numApplyRows; i++) {
191 row = useApplyRows ? applyRows[i] : i;
193 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
194 for (LO v = 0; v < numVecs; v++) {
195 auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
196 for (LO k = 0; k < blockSize; k++) {
197 accum(k, v) = KAT::zero();
200 Offset rowBegin = Arowmap(row);
201 Offset rowEnd = Arowmap(row + 1);
202 for (Offset j = rowBegin; j < rowEnd; j++) {
203 LO col = Aentries(j);
204 const IST* blk = &Avalues(j * bs2);
205 for (LO v = 0; v < numVecs; v++) {
206 auto xCol = x.getLocalBlockHost(col, v, Tpetra::Access::ReadOnly);
207 for (LO br = 0; br < blockSize; br++) {
208 for (LO bc = 0; bc < blockSize; bc++) {
209 IST Aval = blk[br * blockSize + bc];
210 accum(br, v) += Aval * xCol(bc);
216 auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
217 Kokkos::deep_copy(dinv_accum, KAT::zero());
218 for (LO v = 0; v < numVecs; v++) {
219 auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
220 for (LO br = 0; br < blockSize; br++) {
221 accum(br, v) = bRow(br) - accum(br, v);
224 for (LO v = 0; v < numVecs; v++) {
225 for (LO br = 0; br < blockSize; br++) {
226 for (LO bc = 0; bc < blockSize; bc++) {
227 dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
232 for (LO v = 0; v < numVecs; v++) {
233 auto xRow = x.getLocalBlockHost(row, v, Tpetra::Access::ReadWrite);
234 for (LO k = 0; k < blockSize; k++) {
235 xRow(k) += omega * dinv_accum(k, v);
242 void apply(multivector_type& x,
const multivector_type& b, Tpetra::ESweepDirection direction) {
243 if (direction == Tpetra::Symmetric) {
244 apply(x, b, Tpetra::Forward);
245 apply(x, b, Tpetra::Backward);
248 const_values_t Avalues;
249 const_rowmap_t Arowmap;
250 const_entries_t Aentries;
252 Avalues = rowMatrixValues;
253 Arowmap = rowMatrixRowmap;
254 Aentries = rowMatrixEntries;
256 auto Acrs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A);
258 throw std::runtime_error(
"Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
259 auto Alcl = Acrs->getLocalMatrixHost();
260 Avalues = Alcl.values;
261 Arowmap = Alcl.graph.row_map;
262 Aentries = Alcl.graph.entries;
264 if (applyRows.is_null()) {
265 if (x.getNumVectors() > 1)
266 this->
template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
269 if (omega == KAT::one())
270 this->
template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
272 this->
template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
275 this->
template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
282 values_t rowMatrixValues;
283 rowmap_t rowMatrixRowmap;
284 entries_t rowMatrixEntries;
292 InverseBlocksHost inverseBlockDiag;