Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_ChebyshevKernel_def.hpp
1 /*
2 //@HEADER
3 // ***********************************************************************
4 //
5 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
6 // Copyright (2009) Sandia Corporation
7 //
8 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9 // license for use of this work by or on behalf of the U.S. Government.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // ***********************************************************************
39 //@HEADER
40 */
41 
42 #ifndef IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
43 #define IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
44 
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_MultiVector.hpp"
47 #include "Tpetra_Operator.hpp"
48 #include "Tpetra_Vector.hpp"
49 #include "Tpetra_Export_decl.hpp"
50 #include "Tpetra_Import_decl.hpp"
51 #include "Kokkos_ArithTraits.hpp"
52 #include "Teuchos_Assert.hpp"
53 #include <type_traits>
54 #include "KokkosSparse_spmv_impl.hpp"
55 
56 namespace Ifpack2 {
57 namespace Details {
58 namespace Impl {
59 
64 template<class WVector,
65  class DVector,
66  class BVector,
67  class AMatrix,
68  class XVector_colMap,
69  class XVector_domMap,
70  class Scalar,
71  bool use_beta,
72  bool do_X_update>
74  static_assert (static_cast<int> (WVector::rank) == 1,
75  "WVector must be a rank 1 View.");
76  static_assert (static_cast<int> (DVector::rank) == 1,
77  "DVector must be a rank 1 View.");
78  static_assert (static_cast<int> (BVector::rank) == 1,
79  "BVector must be a rank 1 View.");
80  static_assert (static_cast<int> (XVector_colMap::rank) == 1,
81  "XVector_colMap must be a rank 1 View.");
82  static_assert (static_cast<int> (XVector_domMap::rank) == 1,
83  "XVector_domMap must be a rank 1 View.");
84 
85  using execution_space = typename AMatrix::execution_space;
86  using LO = typename AMatrix::non_const_ordinal_type;
87  using value_type = typename AMatrix::non_const_value_type;
88  using team_policy = typename Kokkos::TeamPolicy<execution_space>;
89  using team_member = typename team_policy::member_type;
90  using ATV = Kokkos::ArithTraits<value_type>;
91 
92  const Scalar alpha;
93  WVector m_w;
94  DVector m_d;
95  BVector m_b;
96  AMatrix m_A;
97  XVector_colMap m_x_colMap;
98  XVector_domMap m_x_domMap;
99  const Scalar beta;
100 
101  const LO rows_per_team;
102 
103  ChebyshevKernelVectorFunctor (const Scalar& alpha_,
104  const WVector& m_w_,
105  const DVector& m_d_,
106  const BVector& m_b_,
107  const AMatrix& m_A_,
108  const XVector_colMap& m_x_colMap_,
109  const XVector_domMap& m_x_domMap_,
110  const Scalar& beta_,
111  const int rows_per_team_) :
112  alpha (alpha_),
113  m_w (m_w_),
114  m_d (m_d_),
115  m_b (m_b_),
116  m_A (m_A_),
117  m_x_colMap (m_x_colMap_),
118  m_x_domMap (m_x_domMap_),
119  beta (beta_),
120  rows_per_team (rows_per_team_)
121  {
122  const size_t numRows = m_A.numRows ();
123  const size_t numCols = m_A.numCols ();
124 
125  TEUCHOS_ASSERT( m_w.extent (0) == m_d.extent (0) );
126  TEUCHOS_ASSERT( m_w.extent (0) == m_b.extent (0) );
127  TEUCHOS_ASSERT( numRows == size_t (m_w.extent (0)) );
128  TEUCHOS_ASSERT( numCols <= size_t (m_x_colMap.extent (0)) );
129  TEUCHOS_ASSERT( numRows <= size_t (m_x_domMap.extent (0)) );
130  }
131 
132  KOKKOS_INLINE_FUNCTION
133  void operator() (const team_member& dev) const
134  {
135  using residual_value_type = typename BVector::non_const_value_type;
136  using KAT = Kokkos::ArithTraits<residual_value_type>;
137 
138  Kokkos::parallel_for
139  (Kokkos::TeamThreadRange (dev, 0, rows_per_team),
140  [&] (const LO& loop) {
141  const LO lclRow =
142  static_cast<LO> (dev.league_rank ()) * rows_per_team + loop;
143  if (lclRow >= m_A.numRows ()) {
144  return;
145  }
146  const KokkosSparse::SparseRowViewConst<AMatrix> A_row = m_A.rowConst(lclRow);
147  const LO row_length = static_cast<LO> (A_row.length);
148  residual_value_type A_x = KAT::zero ();
149 
150  Kokkos::parallel_reduce
151  (Kokkos::ThreadVectorRange (dev, row_length),
152  [&] (const LO iEntry, residual_value_type& lsum) {
153  const auto A_val = A_row.value(iEntry);
154  lsum += A_val * m_x_colMap(A_row.colidx(iEntry));
155  }, A_x);
156 
157  Kokkos::single
158  (Kokkos::PerThread(dev),
159  [&] () {
160  const auto alpha_D_res =
161  alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
162  if (use_beta) {
163  m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
164  }
165  else {
166  m_w(lclRow) = alpha_D_res;
167  }
168  if (do_X_update)
169  m_x_domMap(lclRow) += m_w(lclRow);
170  });
171  });
172  }
173 
174 };
175 
176 
177 // W := alpha * D * (B - A*X) + beta * W.
178 template<class WVector,
179  class DVector,
180  class BVector,
181  class AMatrix,
182  class XVector_colMap,
183  class XVector_domMap,
184  class Scalar>
185 static void
186 chebyshev_kernel_vector
187 (const Scalar& alpha,
188  const WVector& w,
189  const DVector& d,
190  const BVector& b,
191  const AMatrix& A,
192  const XVector_colMap& x_colMap,
193  const XVector_domMap& x_domMap,
194  const Scalar& beta,
195  const bool do_X_update)
196 {
197  using execution_space = typename AMatrix::execution_space;
198 
199  if (A.numRows () == 0) {
200  return;
201  }
202 
203  int team_size = -1;
204  int vector_length = -1;
205  int64_t rows_per_thread = -1;
206 
207  const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
208  int64_t worksets = (b.extent (0) + rows_per_team - 1) / rows_per_team;
209 
210  using Kokkos::Dynamic;
211  using Kokkos::Static;
212  using Kokkos::Schedule;
213  using Kokkos::TeamPolicy;
214  using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
215  using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
216  const char kernel_label[] = "chebyshev_kernel_vector";
217  policy_type_dynamic policyDynamic (1, 1);
218  policy_type_static policyStatic (1, 1);
219  if (team_size < 0) {
220  policyDynamic = policy_type_dynamic (worksets, Kokkos::AUTO, vector_length);
221  policyStatic = policy_type_static (worksets, Kokkos::AUTO, vector_length);
222  }
223  else {
224  policyDynamic = policy_type_dynamic (worksets, team_size, vector_length);
225  policyStatic = policy_type_static (worksets, team_size, vector_length);
226  }
227 
228  // Canonicalize template arguments to avoid redundant instantiations.
229  using w_vec_type = typename WVector::non_const_type;
230  using d_vec_type = typename DVector::const_type;
231  using b_vec_type = typename BVector::const_type;
232  using matrix_type = AMatrix;
233  using x_colMap_vec_type = typename XVector_colMap::const_type;
234  using x_domMap_vec_type = typename XVector_domMap::non_const_type;
235  using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
236 
237  if (beta == Kokkos::ArithTraits<Scalar>::zero ()) {
238  constexpr bool use_beta = false;
239  if (do_X_update) {
240  using functor_type =
241  ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
242  b_vec_type, matrix_type,
243  x_colMap_vec_type, x_domMap_vec_type,
244  scalar_type,
245  use_beta,
246  true>;
247  functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
248  if(A.nnz()>10000000)
249  Kokkos::parallel_for (kernel_label, policyDynamic, func);
250  else
251  Kokkos::parallel_for (kernel_label, policyStatic, func);
252  } else {
253  using functor_type =
254  ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
255  b_vec_type, matrix_type,
256  x_colMap_vec_type, x_domMap_vec_type,
257  scalar_type,
258  use_beta,
259  false>;
260  functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
261  if(A.nnz()>10000000)
262  Kokkos::parallel_for (kernel_label, policyDynamic, func);
263  else
264  Kokkos::parallel_for (kernel_label, policyStatic, func);
265  }
266  }
267  else {
268  constexpr bool use_beta = true;
269  if (do_X_update) {
270  using functor_type =
271  ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
272  b_vec_type, matrix_type,
273  x_colMap_vec_type, x_domMap_vec_type,
274  scalar_type,
275  use_beta,
276  true>;
277  functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
278  if(A.nnz()>10000000)
279  Kokkos::parallel_for (kernel_label, policyDynamic, func);
280  else
281  Kokkos::parallel_for (kernel_label, policyStatic, func);
282  } else {
283  using functor_type =
284  ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
285  b_vec_type, matrix_type,
286  x_colMap_vec_type, x_domMap_vec_type,
287  scalar_type,
288  use_beta,
289  false>;
290  functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
291  if(A.nnz()>10000000)
292  Kokkos::parallel_for (kernel_label, policyDynamic, func);
293  else
294  Kokkos::parallel_for (kernel_label, policyStatic, func);
295  }
296  }
297 }
298 
299 } // namespace Impl
300 
301 template<class TpetraOperatorType>
302 ChebyshevKernel<TpetraOperatorType>::
303 ChebyshevKernel (const Teuchos::RCP<const operator_type>& A,
304  const bool useNativeSpMV):
305  useNativeSpMV_(useNativeSpMV)
306 {
307  setMatrix (A);
308 }
309 
310 template<class TpetraOperatorType>
311 void
312 ChebyshevKernel<TpetraOperatorType>::
313 setMatrix (const Teuchos::RCP<const operator_type>& A)
314 {
315  if (A_op_.get () != A.get ()) {
316  A_op_ = A;
317 
318  // We'll (re)allocate these on demand.
319  V1_ = std::unique_ptr<multivector_type> (nullptr);
320 
321  using Teuchos::rcp_dynamic_cast;
323  rcp_dynamic_cast<const crs_matrix_type> (A);
324  if (A_crs.is_null ()) {
325  A_crs_ = Teuchos::null;
326  imp_ = Teuchos::null;
327  exp_ = Teuchos::null;
328  X_colMap_ = nullptr;
329  }
330  else {
331  TEUCHOS_ASSERT( A_crs->isFillComplete () );
332  A_crs_ = A_crs;
333  auto G = A_crs->getCrsGraph ();
334  imp_ = G->getImporter ();
335  exp_ = G->getExporter ();
336  if (!imp_.is_null ()) {
337  if (X_colMap_.get () == nullptr ||
338  !X_colMap_->getMap()->isSameAs (*(imp_->getTargetMap ()))) {
339  X_colMap_ = std::unique_ptr<vector_type> (new vector_type (imp_->getTargetMap ()));
340  }
341  } else
342  X_colMap_ = nullptr;
343  }
344  }
345 }
346 
347 template<class TpetraOperatorType>
348 void
349 ChebyshevKernel<TpetraOperatorType>::
350 compute (multivector_type& W,
351  const SC& alpha,
352  vector_type& D_inv,
353  multivector_type& B,
354  multivector_type& X,
355  const SC& beta)
356 {
357  using Teuchos::RCP;
358  using Teuchos::rcp;
359 
360  if (canFuse (B)) {
361  // "nonconst" here has no effect other than on the return type.
362  W_vec_ = W.getVectorNonConst (0);
363  B_vec_ = B.getVectorNonConst (0);
364  X_vec_ = X.getVectorNonConst (0);
365  TEUCHOS_ASSERT( ! A_crs_.is_null () );
366  fusedCase (*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
367  }
368  else {
369  TEUCHOS_ASSERT( ! A_op_.is_null () );
370  unfusedCase (W, alpha, D_inv, B, *A_op_, X, beta);
371  }
372 }
373 
374 template<class TpetraOperatorType>
375 typename ChebyshevKernel<TpetraOperatorType>::vector_type&
376 ChebyshevKernel<TpetraOperatorType>::
377 importVector (vector_type& X_domMap)
378 {
379  if (imp_.is_null ()) {
380  return X_domMap;
381  }
382  else {
383  X_colMap_->doImport (X_domMap, *imp_, Tpetra::REPLACE);
384  return *X_colMap_;
385  }
386 }
387 
388 template<class TpetraOperatorType>
389 bool
390 ChebyshevKernel<TpetraOperatorType>::
391 canFuse (const multivector_type& B) const
392 {
393  // If override is enabled
394  if(useNativeSpMV_)
395  return false;
396 
397  // Some criteria must be met for fused kernel
398  return B.getNumVectors () == size_t (1) &&
399  ! A_crs_.is_null () &&
400  exp_.is_null ();
401 }
402 
403 template<class TpetraOperatorType>
404 void
405 ChebyshevKernel<TpetraOperatorType>::
406 unfusedCase (multivector_type& W,
407  const SC& alpha,
408  vector_type& D_inv,
409  multivector_type& B,
410  const operator_type& A,
411  multivector_type& X,
412  const SC& beta)
413 {
414  using STS = Teuchos::ScalarTraits<SC>;
415  if (V1_.get () == nullptr) {
416  using MV = multivector_type;
417  const size_t numVecs = B.getNumVectors ();
418  V1_ = std::unique_ptr<MV> (new MV (B.getMap (), numVecs));
419  }
420  const SC one = Teuchos::ScalarTraits<SC>::one ();
421 
422  // V1 = B - A*X
423  Tpetra::deep_copy (*V1_, B);
424  A.apply (X, *V1_, Teuchos::NO_TRANS, -one, one);
425 
426  // W := alpha * D_inv * V1 + beta * W
427  W.elementWiseMultiply (alpha, D_inv, *V1_, beta);
428 
429  // X := X + W
430  X.update (STS::one(), W, STS::one());
431 }
432 
433 template<class TpetraOperatorType>
434 void
435 ChebyshevKernel<TpetraOperatorType>::
436 fusedCase (vector_type& W,
437  const SC& alpha,
438  vector_type& D_inv,
439  vector_type& B,
440  const crs_matrix_type& A,
441  vector_type& X,
442  const SC& beta)
443 {
444  vector_type& X_colMap = importVector (X);
445 
446  using Impl::chebyshev_kernel_vector;
447  using STS = Teuchos::ScalarTraits<SC>;
448 
449  auto A_lcl = A.getLocalMatrixDevice ();
450  //D_inv, B, X and W are all Vectors, so it's safe to take the first column only
451  auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
452  auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
453  auto X_domMap_lcl = Kokkos::subview(X.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
454  auto X_colMap_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
455 
456  const bool do_X_update = !imp_.is_null ();
457  if (beta == STS::zero ()) {
458  auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
459  chebyshev_kernel_vector (alpha, W_lcl, Dinv_lcl,
460  B_lcl, A_lcl,
461  X_colMap_lcl, X_domMap_lcl,
462  beta,
463  do_X_update);
464  }
465  else { // need to read _and_ write W if beta != 0
466  auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
467  chebyshev_kernel_vector (alpha, W_lcl, Dinv_lcl,
468  B_lcl, A_lcl,
469  X_colMap_lcl, X_domMap_lcl,
470  beta,
471  do_X_update);
472  }
473  if (!do_X_update)
474  X.update(STS::one (), W, STS::one ());
475 }
476 
477 } // namespace Details
478 } // namespace Ifpack2
479 
480 #define IFPACK2_DETAILS_CHEBYSHEVKERNEL_INSTANT(SC,LO,GO,NT) \
481  template class Ifpack2::Details::ChebyshevKernel<Tpetra::Operator<SC, LO, GO, NT> >;
482 
483 #endif // IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
T * get() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor for computing W := alpha * D * (B - A*X) + beta * W and X := X+W.
Definition: Ifpack2_Details_ChebyshevKernel_def.hpp:73
#define TEUCHOS_ASSERT(assertion_test)
bool is_null() const