Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_ScaledDampedResidual_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
11 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
12 
13 #include "Tpetra_CrsMatrix.hpp"
14 #include "Tpetra_MultiVector.hpp"
15 #include "Tpetra_Operator.hpp"
16 #include "Tpetra_Vector.hpp"
17 #include "Tpetra_Export_decl.hpp"
18 #include "Tpetra_Import_decl.hpp"
19 #include "Kokkos_ArithTraits.hpp"
20 #include "Teuchos_Assert.hpp"
21 #include <type_traits>
22 #include "KokkosSparse_spmv_impl.hpp"
23 
24 namespace Ifpack2 {
25 namespace Details {
26 namespace Impl {
27 
32 template<class WVector,
33  class DVector,
34  class BVector,
35  class AMatrix,
36  class XVector,
37  class Scalar,
38  bool use_beta>
40  static_assert (static_cast<int> (WVector::rank) == 1,
41  "WVector must be a rank 1 View.");
42  static_assert (static_cast<int> (DVector::rank) == 1,
43  "DVector must be a rank 1 View.");
44  static_assert (static_cast<int> (BVector::rank) == 1,
45  "BVector must be a rank 1 View.");
46  static_assert (static_cast<int> (XVector::rank) == 1,
47  "XVector must be a rank 1 View.");
48 
49  using execution_space = typename AMatrix::execution_space;
50  using LO = typename AMatrix::non_const_ordinal_type;
51  using value_type = typename AMatrix::non_const_value_type;
52  using team_policy = typename Kokkos::TeamPolicy<execution_space>;
53  using team_member = typename team_policy::member_type;
54  using ATV = Kokkos::ArithTraits<value_type>;
55 
56  const Scalar alpha;
57  WVector m_w;
58  DVector m_d;
59  BVector m_b;
60  AMatrix m_A;
61  XVector m_x;
62  const Scalar beta;
63 
64  const LO rows_per_team;
65 
66  ScaledDampedResidualVectorFunctor (const Scalar& alpha_,
67  const WVector& m_w_,
68  const DVector& m_d_,
69  const BVector& m_b_,
70  const AMatrix& m_A_,
71  const XVector& m_x_,
72  const Scalar& beta_,
73  const int rows_per_team_) :
74  alpha (alpha_),
75  m_w (m_w_),
76  m_d (m_d_),
77  m_b (m_b_),
78  m_A (m_A_),
79  m_x (m_x_),
80  beta (beta_),
81  rows_per_team (rows_per_team_)
82  {
83  const size_t numRows = m_A.numRows ();
84  const size_t numCols = m_A.numCols ();
85 
86  TEUCHOS_ASSERT( m_w.extent (0) == m_d.extent (0) );
87  TEUCHOS_ASSERT( m_w.extent (0) == m_b.extent (0) );
88  TEUCHOS_ASSERT( numRows == size_t (m_w.extent (0)) );
89  TEUCHOS_ASSERT( numCols <= size_t (m_x.extent (0)) );
90  }
91 
92  KOKKOS_INLINE_FUNCTION
93  void operator() (const team_member& dev) const
94  {
95  using residual_value_type = typename BVector::non_const_value_type;
96  using KAT = Kokkos::ArithTraits<residual_value_type>;
97 
98  Kokkos::parallel_for
99  (Kokkos::TeamThreadRange (dev, 0, rows_per_team),
100  [&] (const LO& loop) {
101  const LO lclRow =
102  static_cast<LO> (dev.league_rank ()) * rows_per_team + loop;
103  if (lclRow >= m_A.numRows ()) {
104  return;
105  }
106  const auto A_row = m_A.rowConst(lclRow);
107  const LO row_length = static_cast<LO> (A_row.length);
108  residual_value_type A_x = KAT::zero ();
109 
110  Kokkos::parallel_reduce
111  (Kokkos::ThreadVectorRange (dev, row_length),
112  [&] (const LO iEntry, residual_value_type& lsum) {
113  const auto A_val = A_row.value(iEntry);
114  lsum += A_val * m_x(A_row.colidx(iEntry));
115  }, A_x);
116 
117  Kokkos::single
118  (Kokkos::PerThread(dev),
119  [&] () {
120  const auto alpha_D_res =
121  alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
122  if (use_beta) {
123  m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
124  }
125  else {
126  m_w(lclRow) = alpha_D_res;
127  }
128  });
129  });
130  }
131 
132 };
133 
134 
135 // W := alpha * D * (B - A*X) + beta * W.
136 template<class WVector,
137  class DVector,
138  class BVector,
139  class AMatrix,
140  class XVector,
141  class Scalar>
142 static void
143 scaled_damped_residual_vector
144 (const Scalar& alpha,
145  const WVector& w,
146  const DVector& d,
147  const BVector& b,
148  const AMatrix& A,
149  const XVector& x,
150  const Scalar& beta)
151 {
152  using execution_space = typename AMatrix::execution_space;
153 
154  if (A.numRows () == 0) {
155  return;
156  }
157 
158  int team_size = -1;
159  int vector_length = -1;
160  int64_t rows_per_thread = -1;
161 
162  const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
163  int64_t worksets = (b.extent (0) + rows_per_team - 1) / rows_per_team;
164 
165  using Kokkos::Dynamic;
166  using Kokkos::Static;
167  using Kokkos::Schedule;
168  using Kokkos::TeamPolicy;
169  using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
170  using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
171  const char kernel_label[] = "scaled_damped_residual_vector";
172  policy_type_dynamic policyDynamic (1, 1);
173  policy_type_static policyStatic (1, 1);
174  if (team_size < 0) {
175  policyDynamic = policy_type_dynamic (worksets, Kokkos::AUTO, vector_length);
176  policyStatic = policy_type_static (worksets, Kokkos::AUTO, vector_length);
177  }
178  else {
179  policyDynamic = policy_type_dynamic (worksets, team_size, vector_length);
180  policyStatic = policy_type_static (worksets, team_size, vector_length);
181  }
182 
183  // Canonicalize template arguments to avoid redundant instantiations.
184  using w_vec_type = typename WVector::non_const_type;
185  using d_vec_type = typename DVector::const_type;
186  using b_vec_type = typename BVector::const_type;
187  using matrix_type = AMatrix;
188  using x_vec_type = typename XVector::const_type;
189  using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
190 
191  if (beta == Kokkos::ArithTraits<Scalar>::zero ()) {
192  constexpr bool use_beta = false;
193  using functor_type =
194  ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
195  b_vec_type, matrix_type,
196  x_vec_type, scalar_type,
197  use_beta>;
198  functor_type func (alpha, w, d, b, A, x, beta, rows_per_team);
199  if(A.nnz()>10000000)
200  Kokkos::parallel_for (kernel_label, policyDynamic, func);
201  else
202  Kokkos::parallel_for (kernel_label, policyStatic, func);
203  }
204  else {
205  constexpr bool use_beta = true;
206  using functor_type =
207  ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
208  b_vec_type, matrix_type,
209  x_vec_type, scalar_type,
210  use_beta>;
211  functor_type func (alpha, w, d, b, A, x, beta, rows_per_team);
212  if(A.nnz()>10000000)
213  Kokkos::parallel_for (kernel_label, policyDynamic, func);
214  else
215  Kokkos::parallel_for (kernel_label, policyStatic, func);
216  }
217 }
218 
219 } // namespace Impl
220 
221 template<class TpetraOperatorType>
222 ScaledDampedResidual<TpetraOperatorType>::
223 ScaledDampedResidual (const Teuchos::RCP<const operator_type>& A)
224 {
225  setMatrix (A);
226 }
227 
228 template<class TpetraOperatorType>
229 void
230 ScaledDampedResidual<TpetraOperatorType>::
231 setMatrix (const Teuchos::RCP<const operator_type>& A)
232 {
233  if (A_op_.get () != A.get ()) {
234  A_op_ = A;
235 
236  // We'll (re)allocate these on demand.
237  X_colMap_ = std::unique_ptr<vector_type> (nullptr);
238  V1_ = std::unique_ptr<multivector_type> (nullptr);
239 
240  using Teuchos::rcp_dynamic_cast;
242  rcp_dynamic_cast<const crs_matrix_type> (A);
243  if (A_crs.is_null ()) {
244  A_crs_ = Teuchos::null;
245  imp_ = Teuchos::null;
246  exp_ = Teuchos::null;
247  }
248  else {
249  TEUCHOS_ASSERT( A_crs->isFillComplete () );
250  A_crs_ = A_crs;
251  auto G = A_crs->getCrsGraph ();
252  imp_ = G->getImporter ();
253  exp_ = G->getExporter ();
254  }
255  }
256 }
257 
258 template<class TpetraOperatorType>
259 void
260 ScaledDampedResidual<TpetraOperatorType>::
261 compute (multivector_type& W,
262  const SC& alpha,
263  vector_type& D_inv,
264  multivector_type& B,
265  multivector_type& X,
266  const SC& beta)
267 {
268  using Teuchos::RCP;
269  using Teuchos::rcp;
270 
271  if (canFuse (B)) {
272  // "nonconst" here has no effect other than on the return type.
273  W_vec_ = W.getVectorNonConst (0);
274  B_vec_ = B.getVectorNonConst (0);
275  X_vec_ = X.getVectorNonConst (0);
276  TEUCHOS_ASSERT( ! A_crs_.is_null () );
277  fusedCase (*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
278  }
279  else {
280  TEUCHOS_ASSERT( ! A_op_.is_null () );
281  unfusedCase (W, alpha, D_inv, B, *A_op_, X, beta);
282  }
283 }
284 
285 template<class TpetraOperatorType>
286 typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
287 ScaledDampedResidual<TpetraOperatorType>::
288 importVector (vector_type& X_domMap)
289 {
290  if (imp_.is_null ()) {
291  return X_domMap;
292  }
293  else {
294  if (X_colMap_.get () == nullptr) {
295  using V = vector_type;
296  X_colMap_ = std::unique_ptr<V> (new V (imp_->getTargetMap ()));
297  }
298  X_colMap_->doImport (X_domMap, *imp_, Tpetra::REPLACE);
299  return *X_colMap_;
300  }
301 }
302 
303 template<class TpetraOperatorType>
304 bool
305 ScaledDampedResidual<TpetraOperatorType>::
306 canFuse (const multivector_type& B) const
307 {
308  return B.getNumVectors () == size_t (1) &&
309  ! A_crs_.is_null () &&
310  exp_.is_null ();
311 }
312 
313 template<class TpetraOperatorType>
314 void
315 ScaledDampedResidual<TpetraOperatorType>::
316 unfusedCase (multivector_type& W,
317  const SC& alpha,
318  vector_type& D_inv,
319  multivector_type& B,
320  const operator_type& A,
321  multivector_type& X,
322  const SC& beta)
323 {
324  if (V1_.get () == nullptr) {
325  using MV = multivector_type;
326  const size_t numVecs = B.getNumVectors ();
327  V1_ = std::unique_ptr<MV> (new MV (B.getMap (), numVecs));
328  }
329  const SC one = Teuchos::ScalarTraits<SC>::one ();
330 
331  // V1 = B - A*X
332  Tpetra::deep_copy (*V1_, B);
333  A.apply (X, *V1_, Teuchos::NO_TRANS, -one, one);
334 
335  // W := alpha * D_inv * V1 + beta * W
336  W.elementWiseMultiply (alpha, D_inv, *V1_, beta);
337 }
338 
339 template<class TpetraOperatorType>
340 void
341 ScaledDampedResidual<TpetraOperatorType>::
342 fusedCase (vector_type& W,
343  const SC& alpha,
344  vector_type& D_inv,
345  vector_type& B,
346  const crs_matrix_type& A,
347  vector_type& X,
348  const SC& beta)
349 {
350  vector_type& X_colMap = importVector (X);
351 
352  using Impl::scaled_damped_residual_vector;
353  using STS = Teuchos::ScalarTraits<SC>;
354 
355  auto A_lcl = A.getLocalMatrixDevice ();
356  auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
357  auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
358  auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
359  if (beta == STS::zero ()) {
360  auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
361  scaled_damped_residual_vector (alpha, W_lcl, Dinv_lcl,
362  B_lcl, A_lcl, X_lcl, beta);
363  }
364  else { // need to read _and_ write W if beta != 0
365  auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
366  scaled_damped_residual_vector (alpha, W_lcl, Dinv_lcl,
367  B_lcl, A_lcl, X_lcl, beta);
368  }
369 }
370 
371 } // namespace Details
372 } // namespace Ifpack2
373 
374 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC,LO,GO,NT) \
375  template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
376 
377 #endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition: Ifpack2_Details_ScaledDampedResidual_def.hpp:39
T * get() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_ASSERT(assertion_test)
bool is_null() const