Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_ScaledDampedResidual_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
11 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
12 
13 #include "Tpetra_CrsMatrix.hpp"
14 #include "Tpetra_MultiVector.hpp"
15 #include "Tpetra_Operator.hpp"
16 #include "Tpetra_Vector.hpp"
17 #include "Tpetra_Export_decl.hpp"
18 #include "Tpetra_Import_decl.hpp"
19 #if KOKKOS_VERSION >= 40799
20 #include "KokkosKernels_ArithTraits.hpp"
21 #else
22 #include "Kokkos_ArithTraits.hpp"
23 #endif
24 #include "Teuchos_Assert.hpp"
25 #include <type_traits>
26 #include "KokkosSparse_spmv_impl.hpp"
27 
28 namespace Ifpack2 {
29 namespace Details {
30 namespace Impl {
31 
36 template <class WVector,
37  class DVector,
38  class BVector,
39  class AMatrix,
40  class XVector,
41  class Scalar,
42  bool use_beta>
44  static_assert(static_cast<int>(WVector::rank) == 1,
45  "WVector must be a rank 1 View.");
46  static_assert(static_cast<int>(DVector::rank) == 1,
47  "DVector must be a rank 1 View.");
48  static_assert(static_cast<int>(BVector::rank) == 1,
49  "BVector must be a rank 1 View.");
50  static_assert(static_cast<int>(XVector::rank) == 1,
51  "XVector must be a rank 1 View.");
52 
53  using execution_space = typename AMatrix::execution_space;
54  using LO = typename AMatrix::non_const_ordinal_type;
55  using value_type = typename AMatrix::non_const_value_type;
56  using team_policy = typename Kokkos::TeamPolicy<execution_space>;
57  using team_member = typename team_policy::member_type;
58 #if KOKKOS_VERSION >= 40799
59  using ATV = KokkosKernels::ArithTraits<value_type>;
60 #else
61  using ATV = Kokkos::ArithTraits<value_type>;
62 #endif
63 
64  const Scalar alpha;
65  WVector m_w;
66  DVector m_d;
67  BVector m_b;
68  AMatrix m_A;
69  XVector m_x;
70  const Scalar beta;
71 
72  const LO rows_per_team;
73 
74  ScaledDampedResidualVectorFunctor(const Scalar& alpha_,
75  const WVector& m_w_,
76  const DVector& m_d_,
77  const BVector& m_b_,
78  const AMatrix& m_A_,
79  const XVector& m_x_,
80  const Scalar& beta_,
81  const int rows_per_team_)
82  : alpha(alpha_)
83  , m_w(m_w_)
84  , m_d(m_d_)
85  , m_b(m_b_)
86  , m_A(m_A_)
87  , m_x(m_x_)
88  , beta(beta_)
89  , rows_per_team(rows_per_team_) {
90  const size_t numRows = m_A.numRows();
91  const size_t numCols = m_A.numCols();
92 
93  TEUCHOS_ASSERT(m_w.extent(0) == m_d.extent(0));
94  TEUCHOS_ASSERT(m_w.extent(0) == m_b.extent(0));
95  TEUCHOS_ASSERT(numRows == size_t(m_w.extent(0)));
96  TEUCHOS_ASSERT(numCols <= size_t(m_x.extent(0)));
97  }
98 
99  KOKKOS_INLINE_FUNCTION
100  void operator()(const team_member& dev) const {
101  using residual_value_type = typename BVector::non_const_value_type;
102 #if KOKKOS_VERSION >= 40799
103  using KAT = KokkosKernels::ArithTraits<residual_value_type>;
104 #else
105  using KAT = Kokkos::ArithTraits<residual_value_type>;
106 #endif
107 
108  Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, rows_per_team),
109  [&](const LO& loop) {
110  const LO lclRow =
111  static_cast<LO>(dev.league_rank()) * rows_per_team + loop;
112  if (lclRow >= m_A.numRows()) {
113  return;
114  }
115  const auto A_row = m_A.rowConst(lclRow);
116  const LO row_length = static_cast<LO>(A_row.length);
117  residual_value_type A_x = KAT::zero();
118 
119  Kokkos::parallel_reduce(
120  Kokkos::ThreadVectorRange(dev, row_length),
121  [&](const LO iEntry, residual_value_type& lsum) {
122  const auto A_val = A_row.value(iEntry);
123  lsum += A_val * m_x(A_row.colidx(iEntry));
124  },
125  A_x);
126 
127  Kokkos::single(Kokkos::PerThread(dev),
128  [&]() {
129  const auto alpha_D_res =
130  alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
131  if (use_beta) {
132  m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
133  } else {
134  m_w(lclRow) = alpha_D_res;
135  }
136  });
137  });
138  }
139 };
140 
141 // W := alpha * D * (B - A*X) + beta * W.
142 template <class WVector,
143  class DVector,
144  class BVector,
145  class AMatrix,
146  class XVector,
147  class Scalar>
148 static void
149 scaled_damped_residual_vector(const Scalar& alpha,
150  const WVector& w,
151  const DVector& d,
152  const BVector& b,
153  const AMatrix& A,
154  const XVector& x,
155  const Scalar& beta) {
156  using execution_space = typename AMatrix::execution_space;
157 
158  if (A.numRows() == 0) {
159  return;
160  }
161 
162  int team_size = -1;
163  int vector_length = -1;
164  int64_t rows_per_thread = -1;
165 
166  const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
167  int64_t worksets = (b.extent(0) + rows_per_team - 1) / rows_per_team;
168 
169  using Kokkos::Dynamic;
170  using Kokkos::Schedule;
171  using Kokkos::Static;
172  using Kokkos::TeamPolicy;
173  using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
174  using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
175  const char kernel_label[] = "scaled_damped_residual_vector";
176  policy_type_dynamic policyDynamic(1, 1);
177  policy_type_static policyStatic(1, 1);
178  if (team_size < 0) {
179  policyDynamic = policy_type_dynamic(worksets, Kokkos::AUTO, vector_length);
180  policyStatic = policy_type_static(worksets, Kokkos::AUTO, vector_length);
181  } else {
182  policyDynamic = policy_type_dynamic(worksets, team_size, vector_length);
183  policyStatic = policy_type_static(worksets, team_size, vector_length);
184  }
185 
186  // Canonicalize template arguments to avoid redundant instantiations.
187  using w_vec_type = typename WVector::non_const_type;
188  using d_vec_type = typename DVector::const_type;
189  using b_vec_type = typename BVector::const_type;
190  using matrix_type = AMatrix;
191  using x_vec_type = typename XVector::const_type;
192 #if KOKKOS_VERSION >= 40799
193  using scalar_type = typename KokkosKernels::ArithTraits<Scalar>::val_type;
194 #else
195  using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
196 #endif
197 
198 #if KOKKOS_VERSION >= 40799
199  if (beta == KokkosKernels::ArithTraits<Scalar>::zero()) {
200 #else
201  if (beta == Kokkos::ArithTraits<Scalar>::zero()) {
202 #endif
203  constexpr bool use_beta = false;
204  using functor_type =
205  ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
206  b_vec_type, matrix_type,
207  x_vec_type, scalar_type,
208  use_beta>;
209  functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
210  if (A.nnz() > 10000000)
211  Kokkos::parallel_for(kernel_label, policyDynamic, func);
212  else
213  Kokkos::parallel_for(kernel_label, policyStatic, func);
214  } else {
215  constexpr bool use_beta = true;
216  using functor_type =
217  ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
218  b_vec_type, matrix_type,
219  x_vec_type, scalar_type,
220  use_beta>;
221  functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
222  if (A.nnz() > 10000000)
223  Kokkos::parallel_for(kernel_label, policyDynamic, func);
224  else
225  Kokkos::parallel_for(kernel_label, policyStatic, func);
226  }
227 }
228 
229 } // namespace Impl
230 
231 template <class TpetraOperatorType>
232 ScaledDampedResidual<TpetraOperatorType>::
233  ScaledDampedResidual(const Teuchos::RCP<const operator_type>& A) {
234  setMatrix(A);
235 }
236 
237 template <class TpetraOperatorType>
238 void ScaledDampedResidual<TpetraOperatorType>::
239  setMatrix(const Teuchos::RCP<const operator_type>& A) {
240  if (A_op_.get() != A.get()) {
241  A_op_ = A;
242 
243  // We'll (re)allocate these on demand.
244  X_colMap_ = std::unique_ptr<vector_type>(nullptr);
245  V1_ = std::unique_ptr<multivector_type>(nullptr);
246 
247  using Teuchos::rcp_dynamic_cast;
249  rcp_dynamic_cast<const crs_matrix_type>(A);
250  if (A_crs.is_null()) {
251  A_crs_ = Teuchos::null;
252  imp_ = Teuchos::null;
253  exp_ = Teuchos::null;
254  } else {
255  TEUCHOS_ASSERT(A_crs->isFillComplete());
256  A_crs_ = A_crs;
257  auto G = A_crs->getCrsGraph();
258  imp_ = G->getImporter();
259  exp_ = G->getExporter();
260  }
261  }
262 }
263 
264 template <class TpetraOperatorType>
265 void ScaledDampedResidual<TpetraOperatorType>::
266  compute(multivector_type& W,
267  const SC& alpha,
268  vector_type& D_inv,
269  multivector_type& B,
270  multivector_type& X,
271  const SC& beta) {
272  using Teuchos::RCP;
273  using Teuchos::rcp;
274 
275  if (canFuse(B)) {
276  // "nonconst" here has no effect other than on the return type.
277  W_vec_ = W.getVectorNonConst(0);
278  B_vec_ = B.getVectorNonConst(0);
279  X_vec_ = X.getVectorNonConst(0);
280  TEUCHOS_ASSERT(!A_crs_.is_null());
281  fusedCase(*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
282  } else {
283  TEUCHOS_ASSERT(!A_op_.is_null());
284  unfusedCase(W, alpha, D_inv, B, *A_op_, X, beta);
285  }
286 }
287 
288 template <class TpetraOperatorType>
289 typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
290 ScaledDampedResidual<TpetraOperatorType>::
291  importVector(vector_type& X_domMap) {
292  if (imp_.is_null()) {
293  return X_domMap;
294  } else {
295  if (X_colMap_.get() == nullptr) {
296  using V = vector_type;
297  X_colMap_ = std::unique_ptr<V>(new V(imp_->getTargetMap()));
298  }
299  X_colMap_->doImport(X_domMap, *imp_, Tpetra::REPLACE);
300  return *X_colMap_;
301  }
302 }
303 
304 template <class TpetraOperatorType>
305 bool ScaledDampedResidual<TpetraOperatorType>::
306  canFuse(const multivector_type& B) const {
307  return B.getNumVectors() == size_t(1) &&
308  !A_crs_.is_null() &&
309  exp_.is_null();
310 }
311 
312 template <class TpetraOperatorType>
313 void ScaledDampedResidual<TpetraOperatorType>::
314  unfusedCase(multivector_type& W,
315  const SC& alpha,
316  vector_type& D_inv,
317  multivector_type& B,
318  const operator_type& A,
319  multivector_type& X,
320  const SC& beta) {
321  if (V1_.get() == nullptr) {
322  using MV = multivector_type;
323  const size_t numVecs = B.getNumVectors();
324  V1_ = std::unique_ptr<MV>(new MV(B.getMap(), numVecs));
325  }
326  const SC one = Teuchos::ScalarTraits<SC>::one();
327 
328  // V1 = B - A*X
329  Tpetra::deep_copy(*V1_, B);
330  A.apply(X, *V1_, Teuchos::NO_TRANS, -one, one);
331 
332  // W := alpha * D_inv * V1 + beta * W
333  W.elementWiseMultiply(alpha, D_inv, *V1_, beta);
334 }
335 
336 template <class TpetraOperatorType>
337 void ScaledDampedResidual<TpetraOperatorType>::
338  fusedCase(vector_type& W,
339  const SC& alpha,
340  vector_type& D_inv,
341  vector_type& B,
342  const crs_matrix_type& A,
343  vector_type& X,
344  const SC& beta) {
345  vector_type& X_colMap = importVector(X);
346 
347  using Impl::scaled_damped_residual_vector;
348  using STS = Teuchos::ScalarTraits<SC>;
349 
350  auto A_lcl = A.getLocalMatrixDevice();
351  auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
352  auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
353  auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
354  if (beta == STS::zero()) {
355  auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
356  scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
357  B_lcl, A_lcl, X_lcl, beta);
358  } else { // need to read _and_ write W if beta != 0
359  auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
360  scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
361  B_lcl, A_lcl, X_lcl, beta);
362  }
363 }
364 
365 } // namespace Details
366 } // namespace Ifpack2
367 
368 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC, LO, GO, NT) \
369  template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
370 
371 #endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition: Ifpack2_Details_ScaledDampedResidual_def.hpp:43
T * get() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_ASSERT(assertion_test)
bool is_null() const