Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_ScaledDampedResidual_def.hpp
1 /*
2 //@HEADER
3 // ***********************************************************************
4 //
5 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
6 // Copyright (2009) Sandia Corporation
7 //
8 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9 // license for use of this work by or on behalf of the U.S. Government.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // ***********************************************************************
39 //@HEADER
40 */
41 
42 #ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
43 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
44 
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_MultiVector.hpp"
47 #include "Tpetra_Operator.hpp"
48 #include "Tpetra_Vector.hpp"
49 #include "Tpetra_withLocalAccess_MultiVector.hpp"
50 #include "Tpetra_Export_decl.hpp"
51 #include "Tpetra_Import_decl.hpp"
52 #include "Kokkos_ArithTraits.hpp"
53 #include "Teuchos_Assert.hpp"
54 #include <type_traits>
55 #include "KokkosSparse_spmv_impl.hpp"
56 
57 namespace Ifpack2 {
58 namespace Details {
59 namespace Impl {
60 
65 template<class WVector,
66  class DVector,
67  class BVector,
68  class AMatrix,
69  class XVector,
70  class Scalar,
71  bool use_beta>
73  static_assert (static_cast<int> (WVector::Rank) == 1,
74  "WVector must be a rank 1 View.");
75  static_assert (static_cast<int> (DVector::Rank) == 1,
76  "DVector must be a rank 1 View.");
77  static_assert (static_cast<int> (BVector::Rank) == 1,
78  "BVector must be a rank 1 View.");
79  static_assert (static_cast<int> (XVector::Rank) == 1,
80  "XVector must be a rank 1 View.");
81 
82  using execution_space = typename AMatrix::execution_space;
83  using LO = typename AMatrix::non_const_ordinal_type;
84  using value_type = typename AMatrix::non_const_value_type;
85  using team_policy = typename Kokkos::TeamPolicy<execution_space>;
86  using team_member = typename team_policy::member_type;
87  using ATV = Kokkos::ArithTraits<value_type>;
88 
89  const Scalar alpha;
90  WVector m_w;
91  DVector m_d;
92  BVector m_b;
93  AMatrix m_A;
94  XVector m_x;
95  const Scalar beta;
96 
97  const LO rows_per_team;
98 
99  ScaledDampedResidualVectorFunctor (const Scalar& alpha_,
100  const WVector& m_w_,
101  const DVector& m_d_,
102  const BVector& m_b_,
103  const AMatrix& m_A_,
104  const XVector& m_x_,
105  const Scalar& beta_,
106  const int rows_per_team_) :
107  alpha (alpha_),
108  m_w (m_w_),
109  m_d (m_d_),
110  m_b (m_b_),
111  m_A (m_A_),
112  m_x (m_x_),
113  beta (beta_),
114  rows_per_team (rows_per_team_)
115  {
116  const size_t numRows = m_A.numRows ();
117  const size_t numCols = m_A.numCols ();
118 
119  TEUCHOS_ASSERT( m_w.extent (0) == m_d.extent (0) );
120  TEUCHOS_ASSERT( m_w.extent (0) == m_b.extent (0) );
121  TEUCHOS_ASSERT( numRows == size_t (m_w.extent (0)) );
122  TEUCHOS_ASSERT( numCols <= size_t (m_x.extent (0)) );
123  }
124 
125  KOKKOS_INLINE_FUNCTION
126  void operator() (const team_member& dev) const
127  {
128  using residual_value_type = typename BVector::non_const_value_type;
129  using KAT = Kokkos::ArithTraits<residual_value_type>;
130 
131  Kokkos::parallel_for
132  (Kokkos::TeamThreadRange (dev, 0, rows_per_team),
133  [&] (const LO& loop) {
134  const LO lclRow =
135  static_cast<LO> (dev.league_rank ()) * rows_per_team + loop;
136  if (lclRow >= m_A.numRows ()) {
137  return;
138  }
139  const auto A_row = m_A.rowConst(lclRow);
140  const LO row_length = static_cast<LO> (A_row.length);
141  residual_value_type A_x = KAT::zero ();
142 
143  Kokkos::parallel_reduce
144  (Kokkos::ThreadVectorRange (dev, row_length),
145  [&] (const LO iEntry, residual_value_type& lsum) {
146  const auto A_val = A_row.value(iEntry);
147  lsum += A_val * m_x(A_row.colidx(iEntry));
148  }, A_x);
149 
150  Kokkos::single
151  (Kokkos::PerThread(dev),
152  [&] () {
153  const auto alpha_D_res =
154  alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
155  if (use_beta) {
156  m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
157  }
158  else {
159  m_w(lclRow) = alpha_D_res;
160  }
161  });
162  });
163  }
164 
165 };
166 
167 
168 // W := alpha * D * (B - A*X) + beta * W.
169 template<class WVector,
170  class DVector,
171  class BVector,
172  class AMatrix,
173  class XVector,
174  class Scalar>
175 static void
176 scaled_damped_residual_vector
177 (const Scalar& alpha,
178  const WVector& w,
179  const DVector& d,
180  const BVector& b,
181  const AMatrix& A,
182  const XVector& x,
183  const Scalar& beta)
184 {
185  using execution_space = typename AMatrix::execution_space;
186 
187  if (A.numRows () == 0) {
188  return;
189  }
190 
191  int team_size = -1;
192  int vector_length = -1;
193  int64_t rows_per_thread = -1;
194 
195  const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
196  int64_t worksets = (b.extent (0) + rows_per_team - 1) / rows_per_team;
197 
198  using Kokkos::Dynamic;
199  using Kokkos::Static;
200  using Kokkos::Schedule;
201  using Kokkos::TeamPolicy;
202  using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
203  using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
204  const char kernel_label[] = "scaled_damped_residual_vector";
205  policy_type_dynamic policyDynamic (1, 1);
206  policy_type_static policyStatic (1, 1);
207  if (team_size < 0) {
208  policyDynamic = policy_type_dynamic (worksets, Kokkos::AUTO, vector_length);
209  policyStatic = policy_type_static (worksets, Kokkos::AUTO, vector_length);
210  }
211  else {
212  policyDynamic = policy_type_dynamic (worksets, team_size, vector_length);
213  policyStatic = policy_type_static (worksets, team_size, vector_length);
214  }
215 
216  // Canonicalize template arguments to avoid redundant instantiations.
217  using w_vec_type = typename WVector::non_const_type;
218  using d_vec_type = typename DVector::const_type;
219  using b_vec_type = typename BVector::const_type;
220  using matrix_type = AMatrix;
221  using x_vec_type = typename XVector::const_type;
222  using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
223 
224  if (beta == Kokkos::ArithTraits<Scalar>::zero ()) {
225  constexpr bool use_beta = false;
226  using functor_type =
227  ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
228  b_vec_type, matrix_type,
229  x_vec_type, scalar_type,
230  use_beta>;
231  functor_type func (alpha, w, d, b, A, x, beta, rows_per_team);
232  if(A.nnz()>10000000)
233  Kokkos::parallel_for (kernel_label, policyDynamic, func);
234  else
235  Kokkos::parallel_for (kernel_label, policyStatic, func);
236  }
237  else {
238  constexpr bool use_beta = true;
239  using functor_type =
240  ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
241  b_vec_type, matrix_type,
242  x_vec_type, scalar_type,
243  use_beta>;
244  functor_type func (alpha, w, d, b, A, x, beta, rows_per_team);
245  if(A.nnz()>10000000)
246  Kokkos::parallel_for (kernel_label, policyDynamic, func);
247  else
248  Kokkos::parallel_for (kernel_label, policyStatic, func);
249  }
250 }
251 
252 } // namespace Impl
253 
254 template<class TpetraOperatorType>
255 ScaledDampedResidual<TpetraOperatorType>::
256 ScaledDampedResidual (const Teuchos::RCP<const operator_type>& A)
257 {
258  setMatrix (A);
259 }
260 
261 template<class TpetraOperatorType>
262 void
263 ScaledDampedResidual<TpetraOperatorType>::
264 setMatrix (const Teuchos::RCP<const operator_type>& A)
265 {
266  if (A_op_.get () != A.get ()) {
267  A_op_ = A;
268 
269  // We'll (re)allocate these on demand.
270  X_colMap_ = std::unique_ptr<vector_type> (nullptr);
271  V1_ = std::unique_ptr<multivector_type> (nullptr);
272 
273  using Teuchos::rcp_dynamic_cast;
275  rcp_dynamic_cast<const crs_matrix_type> (A);
276  if (A_crs.is_null ()) {
277  A_crs_ = Teuchos::null;
278  imp_ = Teuchos::null;
279  exp_ = Teuchos::null;
280  }
281  else {
282  TEUCHOS_ASSERT( A_crs->isFillComplete () );
283  A_crs_ = A_crs;
284  auto G = A_crs->getCrsGraph ();
285  imp_ = G->getImporter ();
286  exp_ = G->getExporter ();
287  }
288  }
289 }
290 
291 template<class TpetraOperatorType>
292 void
293 ScaledDampedResidual<TpetraOperatorType>::
294 compute (multivector_type& W,
295  const SC& alpha,
296  vector_type& D_inv,
297  multivector_type& B,
298  multivector_type& X,
299  const SC& beta)
300 {
301  using Teuchos::RCP;
302  using Teuchos::rcp;
303 
304  if (canFuse (B)) {
305  // "nonconst" here has no effect other than on the return type.
306  if (W_vec_.is_null() || W.getLocalViewHost().data() != viewW_.data()) {
307  viewW_ = W.getLocalViewHost();
308  W_vec_ = W.getVectorNonConst (0);
309  }
310  if (B_vec_.is_null() || B.getLocalViewHost().data() != viewB_.data()) {
311  viewB_ = B.getLocalViewHost();
312  B_vec_ = B.getVectorNonConst (0);
313  }
314  if (X_vec_.is_null() || X.getLocalViewHost().data() != viewX_.data()) {
315  viewX_ = X.getLocalViewHost();
316  X_vec_ = X.getVectorNonConst (0);
317  }
318  TEUCHOS_ASSERT( ! A_crs_.is_null () );
319  fusedCase (*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
320  }
321  else {
322  TEUCHOS_ASSERT( ! A_op_.is_null () );
323  unfusedCase (W, alpha, D_inv, B, *A_op_, X, beta);
324  }
325 }
326 
327 template<class TpetraOperatorType>
328 typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
329 ScaledDampedResidual<TpetraOperatorType>::
330 importVector (vector_type& X_domMap)
331 {
332  if (imp_.is_null ()) {
333  return X_domMap;
334  }
335  else {
336  if (X_colMap_.get () == nullptr) {
337  using V = vector_type;
338  X_colMap_ = std::unique_ptr<V> (new V (imp_->getTargetMap ()));
339  }
340  X_colMap_->doImport (X_domMap, *imp_, Tpetra::REPLACE);
341  return *X_colMap_;
342  }
343 }
344 
345 template<class TpetraOperatorType>
346 bool
347 ScaledDampedResidual<TpetraOperatorType>::
348 canFuse (const multivector_type& B) const
349 {
350  return B.getNumVectors () == size_t (1) &&
351  ! A_crs_.is_null () &&
352  exp_.is_null ();
353 }
354 
355 template<class TpetraOperatorType>
356 void
357 ScaledDampedResidual<TpetraOperatorType>::
358 unfusedCase (multivector_type& W,
359  const SC& alpha,
360  vector_type& D_inv,
361  multivector_type& B,
362  const operator_type& A,
363  multivector_type& X,
364  const SC& beta)
365 {
366  if (V1_.get () == nullptr) {
367  using MV = multivector_type;
368  const size_t numVecs = B.getNumVectors ();
369  V1_ = std::unique_ptr<MV> (new MV (B.getMap (), numVecs));
370  }
371  const SC one = Teuchos::ScalarTraits<SC>::one ();
372 
373  // V1 = B - A*X
374  Tpetra::deep_copy (*V1_, B);
375  A.apply (X, *V1_, Teuchos::NO_TRANS, -one, one);
376 
377  // W := alpha * D_inv * V1 + beta * W
378  W.elementWiseMultiply (alpha, D_inv, *V1_, beta);
379 }
380 
381 template<class TpetraOperatorType>
382 void
383 ScaledDampedResidual<TpetraOperatorType>::
384 fusedCase (vector_type& W,
385  const SC& alpha,
386  vector_type& D_inv,
387  vector_type& B,
388  const crs_matrix_type& A,
389  vector_type& X,
390  const SC& beta)
391 {
392  vector_type& X_colMap = importVector (X);
393 
394  // Only need these aliases because we lack C++14 generic lambdas.
395  using Tpetra::with_local_access_function_argument_type;
396  using ro_lcl_vec_type =
397  with_local_access_function_argument_type<
398  decltype (readOnly (B))>;
399  using wo_lcl_vec_type =
400  with_local_access_function_argument_type<
401  decltype (writeOnly (B))>;
402  using rw_lcl_vec_type =
403  with_local_access_function_argument_type<
404  decltype (readWrite (B))>;
405 
406  using Tpetra::withLocalAccess;
407  using Tpetra::readOnly;
408  using Tpetra::readWrite;
409  using Tpetra::writeOnly;
410  using Impl::scaled_damped_residual_vector;
411  using STS = Teuchos::ScalarTraits<SC>;
412 
413  auto A_lcl = A.getLocalMatrix ();
414  if (beta == STS::zero ()) {
415  withLocalAccess
416  ([&] (const wo_lcl_vec_type& W_lcl,
417  const ro_lcl_vec_type& D_lcl,
418  const ro_lcl_vec_type& B_lcl,
419  const ro_lcl_vec_type& X_lcl) {
420  scaled_damped_residual_vector (alpha, W_lcl, D_lcl,
421  B_lcl, A_lcl, X_lcl, beta);
422  },
423  writeOnly (W),
424  readOnly (D_inv),
425  readOnly (B),
426  readOnly (X_colMap));
427  }
428  else { // need to read _and_ write W if beta != 0
429  withLocalAccess
430  ([&] (const rw_lcl_vec_type& W_lcl,
431  const ro_lcl_vec_type& D_lcl,
432  const ro_lcl_vec_type& B_lcl,
433  const ro_lcl_vec_type& X_lcl) {
434  scaled_damped_residual_vector (alpha, W_lcl, D_lcl,
435  B_lcl, A_lcl, X_lcl, beta);
436  },
437  readWrite (W),
438  readOnly (D_inv),
439  readOnly (B),
440  readOnly (X_colMap));
441  }
442 }
443 
444 } // namespace Details
445 } // namespace Ifpack2
446 
447 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC,LO,GO,NT) \
448  template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
449 
450 #endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition: Ifpack2_Details_ScaledDampedResidual_def.hpp:72
T * get() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_ASSERT(assertion_test)
bool is_null() const