Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_ChebyshevKernel_def.hpp
1 /*
2 //@HEADER
3 // ***********************************************************************
4 //
5 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
6 // Copyright (2009) Sandia Corporation
7 //
8 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9 // license for use of this work by or on behalf of the U.S. Government.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // ***********************************************************************
39 //@HEADER
40 */
41 
42 #ifndef IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
43 #define IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
44 
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_MultiVector.hpp"
47 #include "Tpetra_Operator.hpp"
48 #include "Tpetra_Vector.hpp"
49 #include "Tpetra_withLocalAccess_MultiVector.hpp"
50 #include "Tpetra_Export_decl.hpp"
51 #include "Tpetra_Import_decl.hpp"
52 #include "Kokkos_ArithTraits.hpp"
53 #include "Teuchos_Assert.hpp"
54 #include <type_traits>
55 #include "KokkosSparse_spmv_impl.hpp"
56 
57 namespace Ifpack2 {
58 namespace Details {
59 namespace Impl {
60 
65 template<class WVector,
66  class DVector,
67  class BVector,
68  class AMatrix,
69  class XVector_colMap,
70  class XVector_domMap,
71  class Scalar,
72  bool use_beta,
73  bool do_X_update>
75  static_assert (static_cast<int> (WVector::Rank) == 1,
76  "WVector must be a rank 1 View.");
77  static_assert (static_cast<int> (DVector::Rank) == 1,
78  "DVector must be a rank 1 View.");
79  static_assert (static_cast<int> (BVector::Rank) == 1,
80  "BVector must be a rank 1 View.");
81  static_assert (static_cast<int> (XVector_colMap::Rank) == 1,
82  "XVector_colMap must be a rank 1 View.");
83  static_assert (static_cast<int> (XVector_domMap::Rank) == 1,
84  "XVector_domMap must be a rank 1 View.");
85 
86  using execution_space = typename AMatrix::execution_space;
87  using LO = typename AMatrix::non_const_ordinal_type;
88  using value_type = typename AMatrix::non_const_value_type;
89  using team_policy = typename Kokkos::TeamPolicy<execution_space>;
90  using team_member = typename team_policy::member_type;
91  using ATV = Kokkos::ArithTraits<value_type>;
92 
93  const Scalar alpha;
94  WVector m_w;
95  DVector m_d;
96  BVector m_b;
97  AMatrix m_A;
98  XVector_colMap m_x_colMap;
99  XVector_domMap m_x_domMap;
100  const Scalar beta;
101 
102  const LO rows_per_team;
103 
104  ChebyshevKernelVectorFunctor (const Scalar& alpha_,
105  const WVector& m_w_,
106  const DVector& m_d_,
107  const BVector& m_b_,
108  const AMatrix& m_A_,
109  const XVector_colMap& m_x_colMap_,
110  const XVector_domMap& m_x_domMap_,
111  const Scalar& beta_,
112  const int rows_per_team_) :
113  alpha (alpha_),
114  m_w (m_w_),
115  m_d (m_d_),
116  m_b (m_b_),
117  m_A (m_A_),
118  m_x_colMap (m_x_colMap_),
119  m_x_domMap (m_x_domMap_),
120  beta (beta_),
121  rows_per_team (rows_per_team_)
122  {
123  const size_t numRows = m_A.numRows ();
124  const size_t numCols = m_A.numCols ();
125 
126  TEUCHOS_ASSERT( m_w.extent (0) == m_d.extent (0) );
127  TEUCHOS_ASSERT( m_w.extent (0) == m_b.extent (0) );
128  TEUCHOS_ASSERT( numRows == size_t (m_w.extent (0)) );
129  TEUCHOS_ASSERT( numCols <= size_t (m_x_colMap.extent (0)) );
130  TEUCHOS_ASSERT( numRows <= size_t (m_x_domMap.extent (0)) );
131  }
132 
133  KOKKOS_INLINE_FUNCTION
134  void operator() (const team_member& dev) const
135  {
136  using residual_value_type = typename BVector::non_const_value_type;
137  using KAT = Kokkos::ArithTraits<residual_value_type>;
138 
139  Kokkos::parallel_for
140  (Kokkos::TeamThreadRange (dev, 0, rows_per_team),
141  [&] (const LO& loop) {
142  const LO lclRow =
143  static_cast<LO> (dev.league_rank ()) * rows_per_team + loop;
144  if (lclRow >= m_A.numRows ()) {
145  return;
146  }
147  const KokkosSparse::SparseRowViewConst<AMatrix> A_row = m_A.rowConst(lclRow);
148  const LO row_length = static_cast<LO> (A_row.length);
149  residual_value_type A_x = KAT::zero ();
150 
151  Kokkos::parallel_reduce
152  (Kokkos::ThreadVectorRange (dev, row_length),
153  [&] (const LO iEntry, residual_value_type& lsum) {
154  const auto A_val = A_row.value(iEntry);
155  lsum += A_val * m_x_colMap(A_row.colidx(iEntry));
156  }, A_x);
157 
158  Kokkos::single
159  (Kokkos::PerThread(dev),
160  [&] () {
161  const auto alpha_D_res =
162  alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
163  if (use_beta) {
164  m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
165  }
166  else {
167  m_w(lclRow) = alpha_D_res;
168  }
169  if (do_X_update)
170  m_x_domMap(lclRow) += m_w(lclRow);
171  });
172  });
173  }
174 
175 };
176 
177 
178 // W := alpha * D * (B - A*X) + beta * W.
179 template<class WVector,
180  class DVector,
181  class BVector,
182  class AMatrix,
183  class XVector_colMap,
184  class XVector_domMap,
185  class Scalar>
186 static void
187 chebyshev_kernel_vector
188 (const Scalar& alpha,
189  const WVector& w,
190  const DVector& d,
191  const BVector& b,
192  const AMatrix& A,
193  const XVector_colMap& x_colMap,
194  const XVector_domMap& x_domMap,
195  const Scalar& beta,
196  const bool do_X_update)
197 {
198  using execution_space = typename AMatrix::execution_space;
199 
200  if (A.numRows () == 0) {
201  return;
202  }
203 
204  int team_size = -1;
205  int vector_length = -1;
206  int64_t rows_per_thread = -1;
207 
208  const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
209  int64_t worksets = (b.extent (0) + rows_per_team - 1) / rows_per_team;
210 
211  using Kokkos::Dynamic;
212  using Kokkos::Static;
213  using Kokkos::Schedule;
214  using Kokkos::TeamPolicy;
215  using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
216  using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
217  const char kernel_label[] = "chebyshev_kernel_vector";
218  policy_type_dynamic policyDynamic (1, 1);
219  policy_type_static policyStatic (1, 1);
220  if (team_size < 0) {
221  policyDynamic = policy_type_dynamic (worksets, Kokkos::AUTO, vector_length);
222  policyStatic = policy_type_static (worksets, Kokkos::AUTO, vector_length);
223  }
224  else {
225  policyDynamic = policy_type_dynamic (worksets, team_size, vector_length);
226  policyStatic = policy_type_static (worksets, team_size, vector_length);
227  }
228 
229  // Canonicalize template arguments to avoid redundant instantiations.
230  using w_vec_type = typename WVector::non_const_type;
231  using d_vec_type = typename DVector::const_type;
232  using b_vec_type = typename BVector::const_type;
233  using matrix_type = AMatrix;
234  using x_colMap_vec_type = typename XVector_colMap::const_type;
235  using x_domMap_vec_type = typename XVector_domMap::non_const_type;
236  using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
237 
238  if (beta == Kokkos::ArithTraits<Scalar>::zero ()) {
239  constexpr bool use_beta = false;
240  if (do_X_update) {
241  using functor_type =
242  ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
243  b_vec_type, matrix_type,
244  x_colMap_vec_type, x_domMap_vec_type,
245  scalar_type,
246  use_beta,
247  true>;
248  functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
249  if(A.nnz()>10000000)
250  Kokkos::parallel_for (kernel_label, policyDynamic, func);
251  else
252  Kokkos::parallel_for (kernel_label, policyStatic, func);
253  } else {
254  using functor_type =
255  ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
256  b_vec_type, matrix_type,
257  x_colMap_vec_type, x_domMap_vec_type,
258  scalar_type,
259  use_beta,
260  false>;
261  functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
262  if(A.nnz()>10000000)
263  Kokkos::parallel_for (kernel_label, policyDynamic, func);
264  else
265  Kokkos::parallel_for (kernel_label, policyStatic, func);
266  }
267  }
268  else {
269  constexpr bool use_beta = true;
270  if (do_X_update) {
271  using functor_type =
272  ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
273  b_vec_type, matrix_type,
274  x_colMap_vec_type, x_domMap_vec_type,
275  scalar_type,
276  use_beta,
277  true>;
278  functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
279  if(A.nnz()>10000000)
280  Kokkos::parallel_for (kernel_label, policyDynamic, func);
281  else
282  Kokkos::parallel_for (kernel_label, policyStatic, func);
283  } else {
284  using functor_type =
285  ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
286  b_vec_type, matrix_type,
287  x_colMap_vec_type, x_domMap_vec_type,
288  scalar_type,
289  use_beta,
290  false>;
291  functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
292  if(A.nnz()>10000000)
293  Kokkos::parallel_for (kernel_label, policyDynamic, func);
294  else
295  Kokkos::parallel_for (kernel_label, policyStatic, func);
296  }
297  }
298 }
299 
300 } // namespace Impl
301 
302 template<class TpetraOperatorType>
303 ChebyshevKernel<TpetraOperatorType>::
304 ChebyshevKernel (const Teuchos::RCP<const operator_type>& A)
305 {
306  setMatrix (A);
307 }
308 
309 template<class TpetraOperatorType>
310 void
311 ChebyshevKernel<TpetraOperatorType>::
312 setMatrix (const Teuchos::RCP<const operator_type>& A)
313 {
314  if (A_op_.get () != A.get ()) {
315  A_op_ = A;
316 
317  // We'll (re)allocate these on demand.
318  X_colMap_ = std::unique_ptr<vector_type> (nullptr);
319  V1_ = std::unique_ptr<multivector_type> (nullptr);
320 
321  using Teuchos::rcp_dynamic_cast;
323  rcp_dynamic_cast<const crs_matrix_type> (A);
324  if (A_crs.is_null ()) {
325  A_crs_ = Teuchos::null;
326  imp_ = Teuchos::null;
327  exp_ = Teuchos::null;
328  }
329  else {
330  TEUCHOS_ASSERT( A_crs->isFillComplete () );
331  A_crs_ = A_crs;
332  auto G = A_crs->getCrsGraph ();
333  imp_ = G->getImporter ();
334  exp_ = G->getExporter ();
335  }
336  }
337 }
338 
339 template<class TpetraOperatorType>
340 void
341 ChebyshevKernel<TpetraOperatorType>::
342 compute (multivector_type& W,
343  const SC& alpha,
344  vector_type& D_inv,
345  multivector_type& B,
346  multivector_type& X,
347  const SC& beta)
348 {
349  using Teuchos::RCP;
350  using Teuchos::rcp;
351 
352  if (canFuse (B)) {
353  // "nonconst" here has no effect other than on the return type.
354  if (W_vec_.is_null() || W.getLocalViewHost().data() != viewW_.data()) {
355  viewW_ = W.getLocalViewHost();
356  W_vec_ = W.getVectorNonConst (0);
357  }
358  if (B_vec_.is_null() || B.getLocalViewHost().data() != viewB_.data()) {
359  viewB_ = B.getLocalViewHost();
360  B_vec_ = B.getVectorNonConst (0);
361  }
362  if (X_vec_.is_null() || X.getLocalViewHost().data() != viewX_.data()) {
363  viewX_ = X.getLocalViewHost();
364  X_vec_ = X.getVectorNonConst (0);
365  }
366  TEUCHOS_ASSERT( ! A_crs_.is_null () );
367  fusedCase (*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
368  }
369  else {
370  TEUCHOS_ASSERT( ! A_op_.is_null () );
371  unfusedCase (W, alpha, D_inv, B, *A_op_, X, beta);
372  }
373 }
374 
375 template<class TpetraOperatorType>
376 typename ChebyshevKernel<TpetraOperatorType>::vector_type&
377 ChebyshevKernel<TpetraOperatorType>::
378 importVector (vector_type& X_domMap)
379 {
380  if (imp_.is_null ()) {
381  return X_domMap;
382  }
383  else {
384  if (X_colMap_.get () == nullptr) {
385  using V = vector_type;
386  X_colMap_ = std::unique_ptr<V> (new V (imp_->getTargetMap ()));
387  }
388  X_colMap_->doImport (X_domMap, *imp_, Tpetra::REPLACE);
389  return *X_colMap_;
390  }
391 }
392 
393 template<class TpetraOperatorType>
394 bool
395 ChebyshevKernel<TpetraOperatorType>::
396 canFuse (const multivector_type& B) const
397 {
398  return B.getNumVectors () == size_t (1) &&
399  ! A_crs_.is_null () &&
400  exp_.is_null ();
401 }
402 
403 template<class TpetraOperatorType>
404 void
405 ChebyshevKernel<TpetraOperatorType>::
406 unfusedCase (multivector_type& W,
407  const SC& alpha,
408  vector_type& D_inv,
409  multivector_type& B,
410  const operator_type& A,
411  multivector_type& X,
412  const SC& beta)
413 {
414  using STS = Teuchos::ScalarTraits<SC>;
415  if (V1_.get () == nullptr) {
416  using MV = multivector_type;
417  const size_t numVecs = B.getNumVectors ();
418  V1_ = std::unique_ptr<MV> (new MV (B.getMap (), numVecs));
419  }
420  const SC one = Teuchos::ScalarTraits<SC>::one ();
421 
422  // V1 = B - A*X
423  Tpetra::deep_copy (*V1_, B);
424  A.apply (X, *V1_, Teuchos::NO_TRANS, -one, one);
425 
426  // W := alpha * D_inv * V1 + beta * W
427  W.elementWiseMultiply (alpha, D_inv, *V1_, beta);
428 
429  // X := X + W
430  X.update (STS::one(), W, STS::one());
431 }
432 
433 template<class TpetraOperatorType>
434 void
435 ChebyshevKernel<TpetraOperatorType>::
436 fusedCase (vector_type& W,
437  const SC& alpha,
438  vector_type& D_inv,
439  vector_type& B,
440  const crs_matrix_type& A,
441  vector_type& X,
442  const SC& beta)
443 {
444  vector_type& X_colMap = importVector (X);
445 
446  // Only need these aliases because we lack C++14 generic lambdas.
447  using Tpetra::with_local_access_function_argument_type;
448  using ro_lcl_vec_type =
449  with_local_access_function_argument_type<
450  decltype (readOnly (B))>;
451  using wo_lcl_vec_type =
452  with_local_access_function_argument_type<
453  decltype (writeOnly (B))>;
454  using rw_lcl_vec_type =
455  with_local_access_function_argument_type<
456  decltype (readWrite (B))>;
457 
458  using Tpetra::withLocalAccess;
459  using Tpetra::readOnly;
460  using Tpetra::readWrite;
461  using Tpetra::writeOnly;
462  using Impl::chebyshev_kernel_vector;
463  using STS = Teuchos::ScalarTraits<SC>;
464 
465  auto A_lcl = A.getLocalMatrix ();
466  const bool do_X_update = !imp_.is_null ();
467  if (beta == STS::zero ()) {
468  withLocalAccess
469  ([&] (const wo_lcl_vec_type& W_lcl,
470  const ro_lcl_vec_type& D_lcl,
471  const ro_lcl_vec_type& B_lcl,
472  const ro_lcl_vec_type& X_colMap_lcl,
473  const wo_lcl_vec_type& X_domMap_lcl) {
474  chebyshev_kernel_vector (alpha, W_lcl, D_lcl,
475  B_lcl, A_lcl,
476  X_colMap_lcl, X_domMap_lcl,
477  beta,
478  do_X_update);
479  },
480  writeOnly (W),
481  readOnly (D_inv),
482  readOnly (B),
483  readOnly (X_colMap),
484  writeOnly (X));
485  }
486  else { // need to read _and_ write W if beta != 0
487  withLocalAccess
488  ([&] (const rw_lcl_vec_type& W_lcl,
489  const ro_lcl_vec_type& D_lcl,
490  const ro_lcl_vec_type& B_lcl,
491  const ro_lcl_vec_type& X_colMap_lcl,
492  const wo_lcl_vec_type& X_domMap_lcl) {
493  chebyshev_kernel_vector (alpha, W_lcl, D_lcl,
494  B_lcl, A_lcl,
495  X_colMap_lcl, X_domMap_lcl,
496  beta,
497  do_X_update);
498  },
499  readWrite (W),
500  readOnly (D_inv),
501  readOnly (B),
502  readOnly (X_colMap),
503  writeOnly (X));
504  }
505  if (!do_X_update)
506  X.update(STS::one (), W, STS::one ());
507 }
508 
509 } // namespace Details
510 } // namespace Ifpack2
511 
512 #define IFPACK2_DETAILS_CHEBYSHEVKERNEL_INSTANT(SC,LO,GO,NT) \
513  template class Ifpack2::Details::ChebyshevKernel<Tpetra::Operator<SC, LO, GO, NT> >;
514 
515 #endif // IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
T * get() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor for computing W := alpha * D * (B - A*X) + beta * W and X := X+W.
Definition: Ifpack2_Details_ChebyshevKernel_def.hpp:74
#define TEUCHOS_ASSERT(assertion_test)
bool is_null() const