Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_ScaledDampedResidual_def.hpp
1 /*
2 //@HEADER
3 // ***********************************************************************
4 //
5 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
6 // Copyright (2009) Sandia Corporation
7 //
8 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9 // license for use of this work by or on behalf of the U.S. Government.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // ***********************************************************************
39 //@HEADER
40 */
41 
42 #ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
43 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
44 
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_MultiVector.hpp"
47 #include "Tpetra_Operator.hpp"
48 #include "Tpetra_Vector.hpp"
49 #include "Tpetra_Export_decl.hpp"
50 #include "Tpetra_Import_decl.hpp"
51 #include "Kokkos_ArithTraits.hpp"
52 #include "Teuchos_Assert.hpp"
53 #include <type_traits>
54 #include "KokkosSparse_spmv_impl.hpp"
55 
56 namespace Ifpack2 {
57 namespace Details {
58 namespace Impl {
59 
64 template<class WVector,
65  class DVector,
66  class BVector,
67  class AMatrix,
68  class XVector,
69  class Scalar,
70  bool use_beta>
72  static_assert (static_cast<int> (WVector::rank) == 1,
73  "WVector must be a rank 1 View.");
74  static_assert (static_cast<int> (DVector::rank) == 1,
75  "DVector must be a rank 1 View.");
76  static_assert (static_cast<int> (BVector::rank) == 1,
77  "BVector must be a rank 1 View.");
78  static_assert (static_cast<int> (XVector::rank) == 1,
79  "XVector must be a rank 1 View.");
80 
81  using execution_space = typename AMatrix::execution_space;
82  using LO = typename AMatrix::non_const_ordinal_type;
83  using value_type = typename AMatrix::non_const_value_type;
84  using team_policy = typename Kokkos::TeamPolicy<execution_space>;
85  using team_member = typename team_policy::member_type;
86  using ATV = Kokkos::ArithTraits<value_type>;
87 
88  const Scalar alpha;
89  WVector m_w;
90  DVector m_d;
91  BVector m_b;
92  AMatrix m_A;
93  XVector m_x;
94  const Scalar beta;
95 
96  const LO rows_per_team;
97 
98  ScaledDampedResidualVectorFunctor (const Scalar& alpha_,
99  const WVector& m_w_,
100  const DVector& m_d_,
101  const BVector& m_b_,
102  const AMatrix& m_A_,
103  const XVector& m_x_,
104  const Scalar& beta_,
105  const int rows_per_team_) :
106  alpha (alpha_),
107  m_w (m_w_),
108  m_d (m_d_),
109  m_b (m_b_),
110  m_A (m_A_),
111  m_x (m_x_),
112  beta (beta_),
113  rows_per_team (rows_per_team_)
114  {
115  const size_t numRows = m_A.numRows ();
116  const size_t numCols = m_A.numCols ();
117 
118  TEUCHOS_ASSERT( m_w.extent (0) == m_d.extent (0) );
119  TEUCHOS_ASSERT( m_w.extent (0) == m_b.extent (0) );
120  TEUCHOS_ASSERT( numRows == size_t (m_w.extent (0)) );
121  TEUCHOS_ASSERT( numCols <= size_t (m_x.extent (0)) );
122  }
123 
124  KOKKOS_INLINE_FUNCTION
125  void operator() (const team_member& dev) const
126  {
127  using residual_value_type = typename BVector::non_const_value_type;
128  using KAT = Kokkos::ArithTraits<residual_value_type>;
129 
130  Kokkos::parallel_for
131  (Kokkos::TeamThreadRange (dev, 0, rows_per_team),
132  [&] (const LO& loop) {
133  const LO lclRow =
134  static_cast<LO> (dev.league_rank ()) * rows_per_team + loop;
135  if (lclRow >= m_A.numRows ()) {
136  return;
137  }
138  const auto A_row = m_A.rowConst(lclRow);
139  const LO row_length = static_cast<LO> (A_row.length);
140  residual_value_type A_x = KAT::zero ();
141 
142  Kokkos::parallel_reduce
143  (Kokkos::ThreadVectorRange (dev, row_length),
144  [&] (const LO iEntry, residual_value_type& lsum) {
145  const auto A_val = A_row.value(iEntry);
146  lsum += A_val * m_x(A_row.colidx(iEntry));
147  }, A_x);
148 
149  Kokkos::single
150  (Kokkos::PerThread(dev),
151  [&] () {
152  const auto alpha_D_res =
153  alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
154  if (use_beta) {
155  m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
156  }
157  else {
158  m_w(lclRow) = alpha_D_res;
159  }
160  });
161  });
162  }
163 
164 };
165 
166 
167 // W := alpha * D * (B - A*X) + beta * W.
168 template<class WVector,
169  class DVector,
170  class BVector,
171  class AMatrix,
172  class XVector,
173  class Scalar>
174 static void
175 scaled_damped_residual_vector
176 (const Scalar& alpha,
177  const WVector& w,
178  const DVector& d,
179  const BVector& b,
180  const AMatrix& A,
181  const XVector& x,
182  const Scalar& beta)
183 {
184  using execution_space = typename AMatrix::execution_space;
185 
186  if (A.numRows () == 0) {
187  return;
188  }
189 
190  int team_size = -1;
191  int vector_length = -1;
192  int64_t rows_per_thread = -1;
193 
194  const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
195  int64_t worksets = (b.extent (0) + rows_per_team - 1) / rows_per_team;
196 
197  using Kokkos::Dynamic;
198  using Kokkos::Static;
199  using Kokkos::Schedule;
200  using Kokkos::TeamPolicy;
201  using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
202  using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
203  const char kernel_label[] = "scaled_damped_residual_vector";
204  policy_type_dynamic policyDynamic (1, 1);
205  policy_type_static policyStatic (1, 1);
206  if (team_size < 0) {
207  policyDynamic = policy_type_dynamic (worksets, Kokkos::AUTO, vector_length);
208  policyStatic = policy_type_static (worksets, Kokkos::AUTO, vector_length);
209  }
210  else {
211  policyDynamic = policy_type_dynamic (worksets, team_size, vector_length);
212  policyStatic = policy_type_static (worksets, team_size, vector_length);
213  }
214 
215  // Canonicalize template arguments to avoid redundant instantiations.
216  using w_vec_type = typename WVector::non_const_type;
217  using d_vec_type = typename DVector::const_type;
218  using b_vec_type = typename BVector::const_type;
219  using matrix_type = AMatrix;
220  using x_vec_type = typename XVector::const_type;
221  using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
222 
223  if (beta == Kokkos::ArithTraits<Scalar>::zero ()) {
224  constexpr bool use_beta = false;
225  using functor_type =
226  ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
227  b_vec_type, matrix_type,
228  x_vec_type, scalar_type,
229  use_beta>;
230  functor_type func (alpha, w, d, b, A, x, beta, rows_per_team);
231  if(A.nnz()>10000000)
232  Kokkos::parallel_for (kernel_label, policyDynamic, func);
233  else
234  Kokkos::parallel_for (kernel_label, policyStatic, func);
235  }
236  else {
237  constexpr bool use_beta = true;
238  using functor_type =
239  ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
240  b_vec_type, matrix_type,
241  x_vec_type, scalar_type,
242  use_beta>;
243  functor_type func (alpha, w, d, b, A, x, beta, rows_per_team);
244  if(A.nnz()>10000000)
245  Kokkos::parallel_for (kernel_label, policyDynamic, func);
246  else
247  Kokkos::parallel_for (kernel_label, policyStatic, func);
248  }
249 }
250 
251 } // namespace Impl
252 
253 template<class TpetraOperatorType>
254 ScaledDampedResidual<TpetraOperatorType>::
255 ScaledDampedResidual (const Teuchos::RCP<const operator_type>& A)
256 {
257  setMatrix (A);
258 }
259 
260 template<class TpetraOperatorType>
261 void
262 ScaledDampedResidual<TpetraOperatorType>::
263 setMatrix (const Teuchos::RCP<const operator_type>& A)
264 {
265  if (A_op_.get () != A.get ()) {
266  A_op_ = A;
267 
268  // We'll (re)allocate these on demand.
269  X_colMap_ = std::unique_ptr<vector_type> (nullptr);
270  V1_ = std::unique_ptr<multivector_type> (nullptr);
271 
272  using Teuchos::rcp_dynamic_cast;
274  rcp_dynamic_cast<const crs_matrix_type> (A);
275  if (A_crs.is_null ()) {
276  A_crs_ = Teuchos::null;
277  imp_ = Teuchos::null;
278  exp_ = Teuchos::null;
279  }
280  else {
281  TEUCHOS_ASSERT( A_crs->isFillComplete () );
282  A_crs_ = A_crs;
283  auto G = A_crs->getCrsGraph ();
284  imp_ = G->getImporter ();
285  exp_ = G->getExporter ();
286  }
287  }
288 }
289 
290 template<class TpetraOperatorType>
291 void
292 ScaledDampedResidual<TpetraOperatorType>::
293 compute (multivector_type& W,
294  const SC& alpha,
295  vector_type& D_inv,
296  multivector_type& B,
297  multivector_type& X,
298  const SC& beta)
299 {
300  using Teuchos::RCP;
301  using Teuchos::rcp;
302 
303  if (canFuse (B)) {
304  // "nonconst" here has no effect other than on the return type.
305  W_vec_ = W.getVectorNonConst (0);
306  B_vec_ = B.getVectorNonConst (0);
307  X_vec_ = X.getVectorNonConst (0);
308  TEUCHOS_ASSERT( ! A_crs_.is_null () );
309  fusedCase (*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
310  }
311  else {
312  TEUCHOS_ASSERT( ! A_op_.is_null () );
313  unfusedCase (W, alpha, D_inv, B, *A_op_, X, beta);
314  }
315 }
316 
317 template<class TpetraOperatorType>
318 typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
319 ScaledDampedResidual<TpetraOperatorType>::
320 importVector (vector_type& X_domMap)
321 {
322  if (imp_.is_null ()) {
323  return X_domMap;
324  }
325  else {
326  if (X_colMap_.get () == nullptr) {
327  using V = vector_type;
328  X_colMap_ = std::unique_ptr<V> (new V (imp_->getTargetMap ()));
329  }
330  X_colMap_->doImport (X_domMap, *imp_, Tpetra::REPLACE);
331  return *X_colMap_;
332  }
333 }
334 
335 template<class TpetraOperatorType>
336 bool
337 ScaledDampedResidual<TpetraOperatorType>::
338 canFuse (const multivector_type& B) const
339 {
340  return B.getNumVectors () == size_t (1) &&
341  ! A_crs_.is_null () &&
342  exp_.is_null ();
343 }
344 
345 template<class TpetraOperatorType>
346 void
347 ScaledDampedResidual<TpetraOperatorType>::
348 unfusedCase (multivector_type& W,
349  const SC& alpha,
350  vector_type& D_inv,
351  multivector_type& B,
352  const operator_type& A,
353  multivector_type& X,
354  const SC& beta)
355 {
356  if (V1_.get () == nullptr) {
357  using MV = multivector_type;
358  const size_t numVecs = B.getNumVectors ();
359  V1_ = std::unique_ptr<MV> (new MV (B.getMap (), numVecs));
360  }
361  const SC one = Teuchos::ScalarTraits<SC>::one ();
362 
363  // V1 = B - A*X
364  Tpetra::deep_copy (*V1_, B);
365  A.apply (X, *V1_, Teuchos::NO_TRANS, -one, one);
366 
367  // W := alpha * D_inv * V1 + beta * W
368  W.elementWiseMultiply (alpha, D_inv, *V1_, beta);
369 }
370 
371 template<class TpetraOperatorType>
372 void
373 ScaledDampedResidual<TpetraOperatorType>::
374 fusedCase (vector_type& W,
375  const SC& alpha,
376  vector_type& D_inv,
377  vector_type& B,
378  const crs_matrix_type& A,
379  vector_type& X,
380  const SC& beta)
381 {
382  vector_type& X_colMap = importVector (X);
383 
384  using Impl::scaled_damped_residual_vector;
385  using STS = Teuchos::ScalarTraits<SC>;
386 
387  auto A_lcl = A.getLocalMatrixDevice ();
388  auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
389  auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
390  auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
391  if (beta == STS::zero ()) {
392  auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
393  scaled_damped_residual_vector (alpha, W_lcl, Dinv_lcl,
394  B_lcl, A_lcl, X_lcl, beta);
395  }
396  else { // need to read _and_ write W if beta != 0
397  auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
398  scaled_damped_residual_vector (alpha, W_lcl, Dinv_lcl,
399  B_lcl, A_lcl, X_lcl, beta);
400  }
401 }
402 
403 } // namespace Details
404 } // namespace Ifpack2
405 
406 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC,LO,GO,NT) \
407  template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
408 
409 #endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition: Ifpack2_Details_ScaledDampedResidual_def.hpp:71
T * get() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_ASSERT(assertion_test)
bool is_null() const