10 #ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
11 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
13 #include "Tpetra_CrsMatrix.hpp"
14 #include "Tpetra_MultiVector.hpp"
15 #include "Tpetra_Operator.hpp"
16 #include "Tpetra_Vector.hpp"
17 #include "Tpetra_Export_decl.hpp"
18 #include "Tpetra_Import_decl.hpp"
19 #if KOKKOS_VERSION >= 40799
20 #include "KokkosKernels_ArithTraits.hpp"
22 #include "Kokkos_ArithTraits.hpp"
24 #include "Teuchos_Assert.hpp"
25 #include <type_traits>
26 #include "KokkosSparse_spmv_impl.hpp"
36 template <
class WVector,
44 static_assert(static_cast<int>(WVector::rank) == 1,
45 "WVector must be a rank 1 View.");
46 static_assert(static_cast<int>(DVector::rank) == 1,
47 "DVector must be a rank 1 View.");
48 static_assert(static_cast<int>(BVector::rank) == 1,
49 "BVector must be a rank 1 View.");
50 static_assert(static_cast<int>(XVector::rank) == 1,
51 "XVector must be a rank 1 View.");
53 using execution_space =
typename AMatrix::execution_space;
54 using LO =
typename AMatrix::non_const_ordinal_type;
55 using value_type =
typename AMatrix::non_const_value_type;
56 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
57 using team_member =
typename team_policy::member_type;
58 #if KOKKOS_VERSION >= 40799
59 using ATV = KokkosKernels::ArithTraits<value_type>;
61 using ATV = Kokkos::ArithTraits<value_type>;
72 const LO rows_per_team;
81 const int rows_per_team_)
89 , rows_per_team(rows_per_team_) {
90 const size_t numRows = m_A.numRows();
91 const size_t numCols = m_A.numCols();
99 KOKKOS_INLINE_FUNCTION
100 void operator()(
const team_member& dev)
const {
101 using residual_value_type =
typename BVector::non_const_value_type;
102 #if KOKKOS_VERSION >= 40799
103 using KAT = KokkosKernels::ArithTraits<residual_value_type>;
105 using KAT = Kokkos::ArithTraits<residual_value_type>;
108 Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, rows_per_team),
109 [&](
const LO& loop) {
111 static_cast<LO
>(dev.league_rank()) * rows_per_team + loop;
112 if (lclRow >= m_A.numRows()) {
115 const auto A_row = m_A.rowConst(lclRow);
116 const LO row_length =
static_cast<LO
>(A_row.length);
117 residual_value_type A_x = KAT::zero();
119 Kokkos::parallel_reduce(
120 Kokkos::ThreadVectorRange(dev, row_length),
121 [&](
const LO iEntry, residual_value_type& lsum) {
122 const auto A_val = A_row.value(iEntry);
123 lsum += A_val * m_x(A_row.colidx(iEntry));
127 Kokkos::single(Kokkos::PerThread(dev),
129 const auto alpha_D_res =
130 alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
132 m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
134 m_w(lclRow) = alpha_D_res;
142 template <
class WVector,
149 scaled_damped_residual_vector(
const Scalar& alpha,
155 const Scalar& beta) {
156 using execution_space =
typename AMatrix::execution_space;
158 if (A.numRows() == 0) {
163 int vector_length = -1;
164 int64_t rows_per_thread = -1;
166 const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
167 int64_t worksets = (b.extent(0) + rows_per_team - 1) / rows_per_team;
169 using Kokkos::Dynamic;
170 using Kokkos::Schedule;
171 using Kokkos::Static;
172 using Kokkos::TeamPolicy;
173 using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
174 using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
175 const char kernel_label[] =
"scaled_damped_residual_vector";
176 policy_type_dynamic policyDynamic(1, 1);
177 policy_type_static policyStatic(1, 1);
179 policyDynamic = policy_type_dynamic(worksets, Kokkos::AUTO, vector_length);
180 policyStatic = policy_type_static(worksets, Kokkos::AUTO, vector_length);
182 policyDynamic = policy_type_dynamic(worksets, team_size, vector_length);
183 policyStatic = policy_type_static(worksets, team_size, vector_length);
187 using w_vec_type =
typename WVector::non_const_type;
188 using d_vec_type =
typename DVector::const_type;
189 using b_vec_type =
typename BVector::const_type;
190 using matrix_type = AMatrix;
191 using x_vec_type =
typename XVector::const_type;
192 #if KOKKOS_VERSION >= 40799
193 using scalar_type =
typename KokkosKernels::ArithTraits<Scalar>::val_type;
195 using scalar_type =
typename Kokkos::ArithTraits<Scalar>::val_type;
198 #if KOKKOS_VERSION >= 40799
199 if (beta == KokkosKernels::ArithTraits<Scalar>::zero()) {
201 if (beta == Kokkos::ArithTraits<Scalar>::zero()) {
203 constexpr
bool use_beta =
false;
205 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
206 b_vec_type, matrix_type,
207 x_vec_type, scalar_type,
209 functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
210 if (A.nnz() > 10000000)
211 Kokkos::parallel_for(kernel_label, policyDynamic, func);
213 Kokkos::parallel_for(kernel_label, policyStatic, func);
215 constexpr
bool use_beta =
true;
217 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
218 b_vec_type, matrix_type,
219 x_vec_type, scalar_type,
221 functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
222 if (A.nnz() > 10000000)
223 Kokkos::parallel_for(kernel_label, policyDynamic, func);
225 Kokkos::parallel_for(kernel_label, policyStatic, func);
231 template <
class TpetraOperatorType>
232 ScaledDampedResidual<TpetraOperatorType>::
237 template <
class TpetraOperatorType>
238 void ScaledDampedResidual<TpetraOperatorType>::
240 if (A_op_.get() != A.
get()) {
244 X_colMap_ = std::unique_ptr<vector_type>(
nullptr);
245 V1_ = std::unique_ptr<multivector_type>(
nullptr);
247 using Teuchos::rcp_dynamic_cast;
249 rcp_dynamic_cast<
const crs_matrix_type>(A);
251 A_crs_ = Teuchos::null;
252 imp_ = Teuchos::null;
253 exp_ = Teuchos::null;
257 auto G = A_crs->getCrsGraph();
258 imp_ = G->getImporter();
259 exp_ = G->getExporter();
264 template <
class TpetraOperatorType>
265 void ScaledDampedResidual<TpetraOperatorType>::
266 compute(multivector_type& W,
277 W_vec_ = W.getVectorNonConst(0);
278 B_vec_ = B.getVectorNonConst(0);
279 X_vec_ = X.getVectorNonConst(0);
281 fusedCase(*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
284 unfusedCase(W, alpha, D_inv, B, *A_op_, X, beta);
288 template <
class TpetraOperatorType>
289 typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
290 ScaledDampedResidual<TpetraOperatorType>::
291 importVector(vector_type& X_domMap) {
292 if (imp_.is_null()) {
295 if (X_colMap_.get() ==
nullptr) {
296 using V = vector_type;
297 X_colMap_ = std::unique_ptr<V>(
new V(imp_->getTargetMap()));
299 X_colMap_->doImport(X_domMap, *imp_, Tpetra::REPLACE);
304 template <
class TpetraOperatorType>
305 bool ScaledDampedResidual<TpetraOperatorType>::
306 canFuse(
const multivector_type& B)
const {
307 return B.getNumVectors() == size_t(1) &&
312 template <
class TpetraOperatorType>
313 void ScaledDampedResidual<TpetraOperatorType>::
314 unfusedCase(multivector_type& W,
318 const operator_type& A,
321 if (V1_.get() ==
nullptr) {
322 using MV = multivector_type;
323 const size_t numVecs = B.getNumVectors();
324 V1_ = std::unique_ptr<MV>(
new MV(B.getMap(), numVecs));
329 Tpetra::deep_copy(*V1_, B);
333 W.elementWiseMultiply(alpha, D_inv, *V1_, beta);
336 template <
class TpetraOperatorType>
337 void ScaledDampedResidual<TpetraOperatorType>::
338 fusedCase(vector_type& W,
342 const crs_matrix_type& A,
345 vector_type& X_colMap = importVector(X);
347 using Impl::scaled_damped_residual_vector;
350 auto A_lcl = A.getLocalMatrixDevice();
351 auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
352 auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
353 auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
354 if (beta == STS::zero()) {
355 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
356 scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
357 B_lcl, A_lcl, X_lcl, beta);
359 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
360 scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
361 B_lcl, A_lcl, X_lcl, beta);
368 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC, LO, GO, NT) \
369 template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
371 #endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition: Ifpack2_Details_ScaledDampedResidual_def.hpp:43
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_ASSERT(assertion_test)