10 #ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP 
   11 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP 
   13 #include "Tpetra_CrsMatrix.hpp" 
   14 #include "Tpetra_MultiVector.hpp" 
   15 #include "Tpetra_Operator.hpp" 
   16 #include "Tpetra_Vector.hpp" 
   17 #include "Tpetra_Export_decl.hpp" 
   18 #include "Tpetra_Import_decl.hpp" 
   19 #if KOKKOS_VERSION >= 40799 
   20 #include "KokkosKernels_ArithTraits.hpp" 
   22 #include "Kokkos_ArithTraits.hpp" 
   24 #include "Teuchos_Assert.hpp" 
   25 #include <type_traits> 
   26 #include "KokkosSparse_spmv_impl.hpp" 
   36 template <
class WVector,
 
   44   static_assert(static_cast<int>(WVector::rank) == 1,
 
   45                 "WVector must be a rank 1 View.");
 
   46   static_assert(static_cast<int>(DVector::rank) == 1,
 
   47                 "DVector must be a rank 1 View.");
 
   48   static_assert(static_cast<int>(BVector::rank) == 1,
 
   49                 "BVector must be a rank 1 View.");
 
   50   static_assert(static_cast<int>(XVector::rank) == 1,
 
   51                 "XVector must be a rank 1 View.");
 
   53   using execution_space = 
typename AMatrix::execution_space;
 
   54   using LO              = 
typename AMatrix::non_const_ordinal_type;
 
   55   using value_type      = 
typename AMatrix::non_const_value_type;
 
   56   using team_policy     = 
typename Kokkos::TeamPolicy<execution_space>;
 
   57   using team_member     = 
typename team_policy::member_type;
 
   58 #if KOKKOS_VERSION >= 40799 
   59   using ATV = KokkosKernels::ArithTraits<value_type>;
 
   61   using ATV = Kokkos::ArithTraits<value_type>;
 
   72   const LO rows_per_team;
 
   81                                     const int rows_per_team_)
 
   89     , rows_per_team(rows_per_team_) {
 
   90     const size_t numRows = m_A.numRows();
 
   91     const size_t numCols = m_A.numCols();
 
   99   KOKKOS_INLINE_FUNCTION
 
  100   void operator()(
const team_member& dev)
 const {
 
  101     using residual_value_type = 
typename BVector::non_const_value_type;
 
  102 #if KOKKOS_VERSION >= 40799 
  103     using KAT = KokkosKernels::ArithTraits<residual_value_type>;
 
  105     using KAT = Kokkos::ArithTraits<residual_value_type>;
 
  108     Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, rows_per_team),
 
  109                          [&](
const LO& loop) {
 
  111                                static_cast<LO
>(dev.league_rank()) * rows_per_team + loop;
 
  112                            if (lclRow >= m_A.numRows()) {
 
  115                            const auto A_row        = m_A.rowConst(lclRow);
 
  116                            const LO row_length     = 
static_cast<LO
>(A_row.length);
 
  117                            residual_value_type A_x = KAT::zero();
 
  119                            Kokkos::parallel_reduce(
 
  120                                Kokkos::ThreadVectorRange(dev, row_length),
 
  121                                [&](
const LO iEntry, residual_value_type& lsum) {
 
  122                                  const auto A_val = A_row.value(iEntry);
 
  123                                  lsum += A_val * m_x(A_row.colidx(iEntry));
 
  127                            Kokkos::single(Kokkos::PerThread(dev),
 
  129                                             const auto alpha_D_res =
 
  130                                                 alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
 
  132                                               m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
 
  134                                               m_w(lclRow) = alpha_D_res;
 
  142 template <
class WVector,
 
  149 scaled_damped_residual_vector(
const Scalar& alpha,
 
  155                               const Scalar& beta) {
 
  156   using execution_space = 
typename AMatrix::execution_space;
 
  158   if (A.numRows() == 0) {
 
  163   int vector_length       = -1;
 
  164   int64_t rows_per_thread = -1;
 
  166   const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
 
  167   int64_t worksets            = (b.extent(0) + rows_per_team - 1) / rows_per_team;
 
  169   using Kokkos::Dynamic;
 
  170   using Kokkos::Schedule;
 
  171   using Kokkos::Static;
 
  172   using Kokkos::TeamPolicy;
 
  173   using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
 
  174   using policy_type_static  = TeamPolicy<execution_space, Schedule<Static> >;
 
  175   const char kernel_label[] = 
"scaled_damped_residual_vector";
 
  176   policy_type_dynamic policyDynamic(1, 1);
 
  177   policy_type_static policyStatic(1, 1);
 
  179     policyDynamic = policy_type_dynamic(worksets, Kokkos::AUTO, vector_length);
 
  180     policyStatic  = policy_type_static(worksets, Kokkos::AUTO, vector_length);
 
  182     policyDynamic = policy_type_dynamic(worksets, team_size, vector_length);
 
  183     policyStatic  = policy_type_static(worksets, team_size, vector_length);
 
  187   using w_vec_type  = 
typename WVector::non_const_type;
 
  188   using d_vec_type  = 
typename DVector::const_type;
 
  189   using b_vec_type  = 
typename BVector::const_type;
 
  190   using matrix_type = AMatrix;
 
  191   using x_vec_type  = 
typename XVector::const_type;
 
  192 #if KOKKOS_VERSION >= 40799 
  193   using scalar_type = 
typename KokkosKernels::ArithTraits<Scalar>::val_type;
 
  195   using scalar_type = 
typename Kokkos::ArithTraits<Scalar>::val_type;
 
  198 #if KOKKOS_VERSION >= 40799 
  199   if (beta == KokkosKernels::ArithTraits<Scalar>::zero()) {
 
  201   if (beta == Kokkos::ArithTraits<Scalar>::zero()) {
 
  203     constexpr 
bool use_beta = 
false;
 
  205         ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
 
  206                                           b_vec_type, matrix_type,
 
  207                                           x_vec_type, scalar_type,
 
  209     functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
 
  210     if (A.nnz() > 10000000)
 
  211       Kokkos::parallel_for(kernel_label, policyDynamic, func);
 
  213       Kokkos::parallel_for(kernel_label, policyStatic, func);
 
  215     constexpr 
bool use_beta = 
true;
 
  217         ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
 
  218                                           b_vec_type, matrix_type,
 
  219                                           x_vec_type, scalar_type,
 
  221     functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
 
  222     if (A.nnz() > 10000000)
 
  223       Kokkos::parallel_for(kernel_label, policyDynamic, func);
 
  225       Kokkos::parallel_for(kernel_label, policyStatic, func);
 
  231 template <
class TpetraOperatorType>
 
  232 ScaledDampedResidual<TpetraOperatorType>::
 
  237 template <
class TpetraOperatorType>
 
  238 void ScaledDampedResidual<TpetraOperatorType>::
 
  240   if (A_op_.get() != A.
get()) {
 
  244     X_colMap_ = std::unique_ptr<vector_type>(
nullptr);
 
  245     V1_       = std::unique_ptr<multivector_type>(
nullptr);
 
  247     using Teuchos::rcp_dynamic_cast;
 
  249         rcp_dynamic_cast<
const crs_matrix_type>(A);
 
  251       A_crs_ = Teuchos::null;
 
  252       imp_   = Teuchos::null;
 
  253       exp_   = Teuchos::null;
 
  257       auto G = A_crs->getCrsGraph();
 
  258       imp_   = G->getImporter();
 
  259       exp_   = G->getExporter();
 
  264 template <
class TpetraOperatorType>
 
  265 void ScaledDampedResidual<TpetraOperatorType>::
 
  266     compute(multivector_type& W,
 
  277     W_vec_ = W.getVectorNonConst(0);
 
  278     B_vec_ = B.getVectorNonConst(0);
 
  279     X_vec_ = X.getVectorNonConst(0);
 
  281     fusedCase(*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
 
  284     unfusedCase(W, alpha, D_inv, B, *A_op_, X, beta);
 
  288 template <
class TpetraOperatorType>
 
  289 typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
 
  290 ScaledDampedResidual<TpetraOperatorType>::
 
  291     importVector(vector_type& X_domMap) {
 
  292   if (imp_.is_null()) {
 
  295     if (X_colMap_.get() == 
nullptr) {
 
  296       using V   = vector_type;
 
  297       X_colMap_ = std::unique_ptr<V>(
new V(imp_->getTargetMap()));
 
  299     X_colMap_->doImport(X_domMap, *imp_, Tpetra::REPLACE);
 
  304 template <
class TpetraOperatorType>
 
  305 bool ScaledDampedResidual<TpetraOperatorType>::
 
  306     canFuse(
const multivector_type& B)
 const {
 
  307   return B.getNumVectors() == size_t(1) &&
 
  312 template <
class TpetraOperatorType>
 
  313 void ScaledDampedResidual<TpetraOperatorType>::
 
  314     unfusedCase(multivector_type& W,
 
  318                 const operator_type& A,
 
  321   if (V1_.get() == 
nullptr) {
 
  322     using MV             = multivector_type;
 
  323     const size_t numVecs = B.getNumVectors();
 
  324     V1_                  = std::unique_ptr<MV>(
new MV(B.getMap(), numVecs));
 
  329   Tpetra::deep_copy(*V1_, B);
 
  333   W.elementWiseMultiply(alpha, D_inv, *V1_, beta);
 
  336 template <
class TpetraOperatorType>
 
  337 void ScaledDampedResidual<TpetraOperatorType>::
 
  338     fusedCase(vector_type& W,
 
  342               const crs_matrix_type& A,
 
  345   vector_type& X_colMap = importVector(X);
 
  347   using Impl::scaled_damped_residual_vector;
 
  350   auto A_lcl    = A.getLocalMatrixDevice();
 
  351   auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
 
  352   auto B_lcl    = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
 
  353   auto X_lcl    = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
 
  354   if (beta == STS::zero()) {
 
  355     auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
 
  356     scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
 
  357                                   B_lcl, A_lcl, X_lcl, beta);
 
  359     auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
 
  360     scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
 
  361                                   B_lcl, A_lcl, X_lcl, beta);
 
  368 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC, LO, GO, NT) \ 
  369   template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >; 
  371 #endif  // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP 
Functor for computing W := alpha * D * (B - A*X) + beta * W. 
Definition: Ifpack2_Details_ScaledDampedResidual_def.hpp:43
 
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
 
#define TEUCHOS_ASSERT(assertion_test)