10 #ifndef KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
11 #define KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
13 #include <type_traits>
14 #include "Sacado_ConfigDefs.h"
20 #include "KokkosBlas.hpp"
23 #include "Kokkos_Core.hpp"
25 #include "Stokhos_config.h"
27 #define Sacado_MP_Vector_GEMV_Tile_Size(size) (STOKHOS_GEMV_CACHE_SIZE / size)
30 template <
class AViewType,
33 class IndexType =
typename AViewType::size_type>
50 KOKKOS_INLINE_FUNCTION
void
53 const IndexType m =
y_.extent(0);
54 const IndexType
n =
x_.extent(0);
56 IndexType i_min =
m_c_ * i_tile;
57 bool last_tile = (i_min +
m_c_ >= m);
58 IndexType i_max = (last_tile) ? m : (i_min + m_c_);
60 #ifdef STOKHOS_HAVE_PRAGMA_UNROLL
64 for (IndexType i = i_min; i < i_max; ++i)
67 for (IndexType i = i_min; i < i_max; ++i)
70 for (IndexType
j = 0;
j < n; ++
j)
74 for (IndexType i = i_min; i < i_max; ++i)
75 y_(i) += alphab *
A_(i,
j);
81 typename AViewType::const_type
A_;
82 typename XViewType::const_type
x_;
89 template <
class AViewType,
92 class IndexType =
typename AViewType::size_type>
111 KOKKOS_INLINE_FUNCTION
void
114 const IndexType m =
y_.extent(0);
115 const IndexType
n =
x_.extent(0);
117 const int j = team.league_rank();
118 const IndexType j_min =
n_c_ *
j;
119 const IndexType nj = (j_min +
n_c_ > n) ? (n - j_min) :
n_c_;
120 const IndexType i_min = j % m;
122 for (IndexType i = i_min; i < m; ++i)
125 Kokkos::parallel_reduce(
126 Kokkos::TeamThreadRange(team, nj), [=](
int jj,
Scalar &tmp_sum) {
127 tmp_sum +=
A_(jj + j_min, i) *
x_(jj + j_min);
130 if (team.team_rank() == 0)
136 for (IndexType i = 0; i < i_min; ++i)
139 Kokkos::parallel_reduce(
140 Kokkos::TeamThreadRange(team, nj), [=](
int jj,
Scalar &tmp_sum) {
141 tmp_sum +=
A_(jj + j_min, i) *
x_(jj + j_min);
144 if (team.team_rank() == 0)
154 typename AViewType::const_type
A_;
155 typename XViewType::const_type
x_;
165 typename VA::const_value_type &alpha,
168 typename VY::const_value_type &beta,
172 using IndexType =
typename VA::size_type;
173 using policy_type = Kokkos::RangePolicy<execution_space, IndexType>;
176 const size_t m = y.extent(0);
180 const size_t n_tiles_per_thread =
ceil(((
double)m) / (N * m_c_star));
181 const size_t m_c =
ceil(((
double)m) / (N * n_tiles_per_thread));
182 const size_t n_tiles = N * n_tiles_per_thread;
184 policy_type range(0, n_tiles);
187 functor_type functor(alpha, A, x, beta, y, m_c);
189 Kokkos::parallel_for(
"KokkosBlas::gemv[Update]", range, functor);
197 typename VA::const_value_type &alpha,
200 typename VY::const_value_type &beta,
204 using IndexType =
typename VA::size_type;
205 using team_policy_type = Kokkos::TeamPolicy<execution_space>;
208 const size_t m = y.extent(0);
209 const size_t n = x.extent(0);
211 const size_t team_size = STOKHOS_GEMV_TEAM_SIZE;
215 const size_t n_tiles_per_thread =
ceil(((
double)n) / (N * m_c_star));
216 const size_t m_c =
ceil(((
double)n) / (N * n_tiles_per_thread));
217 const size_t n_per_tile2 = m_c * team_size;
219 const size_t n_i2 =
ceil(((
double)n) / n_per_tile2);
221 team_policy_type team(n_i2, team_size);
224 Kokkos::parallel_for(
225 m, KOKKOS_LAMBDA(
const int i) {
229 Kokkos::parallel_for(
230 m, KOKKOS_LAMBDA(
const int i) {
235 functor_type functor(alpha, A, x, y, n_per_tile2);
237 Kokkos::parallel_for(
"KokkosBlas::gemv[InnerProducts]", team, functor);
242 template <
typename DA,
typename... PA,
243 typename DX,
typename... PX,
244 typename DY,
typename... PY>
249 typename Kokkos::View<DA, PA...>::const_value_type &alpha,
250 const Kokkos::View<DA, PA...> &
A,
251 const Kokkos::View<DX, PX...> &x,
252 typename Kokkos::View<DY, PY...>::const_value_type &beta,
253 const Kokkos::View<DY, PY...> &y)
256 typedef Kokkos::View<DA, PA...> VA;
257 typedef Kokkos::View<DX, PX...> VX;
258 typedef Kokkos::View<DY, PY...> VY;
260 static_assert(VA::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
261 static_assert(VX::rank == 1,
"GEMM: x must have rank 1 (be a vector).");
262 static_assert(VY::rank == 1,
"GEMM: y must have rank 1 (be a vector).");
264 if (trans[0] ==
'n' || trans[0] ==
'N')
265 update_MP<Scalar, VA, VX, VY>(alpha, A, x, beta, y);
267 inner_products_MP<Scalar, VA, VX, VY>(alpha, A, x, beta, y);
Kokkos::DefaultExecutionSpace execution_space
std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< DA, PA...> >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DX, PX...> >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DY, PY...> >::value >::type gemv(const char trans[], typename Kokkos::View< DA, PA...>::const_value_type &alpha, const Kokkos::View< DA, PA...> &A, const Kokkos::View< DX, PX...> &x, typename Kokkos::View< DY, PY...>::const_value_type &beta, const Kokkos::View< DY, PY...> &y)
#define Sacado_MP_Vector_GEMV_Tile_Size(size)
Kokkos::DefaultExecutionSpace execution_space
Kokkos::TeamPolicy< execution_space > policy_type
void update_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
KOKKOS_INLINE_FUNCTION void atomic_add(volatile Sacado::UQ::PCE< Storage > *const dest, const Sacado::UQ::PCE< Storage > &src)
typename AViewType::execution_space execution_space
typename AViewType::non_const_value_type AlphaCoeffType
KOKKOS_INLINE_FUNCTION PCE< Storage > ceil(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION void operator()(const IndexType &i_tile) const
KOKKOS_INLINE_FUNCTION void operator()(const member_type &team) const
typename YViewType::non_const_value_type BetaCoeffType
typename policy_type::member_type member_type
updateF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const BetaCoeffType &beta, const YViewType &y, const IndexType m_c)
innerF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const YViewType &y, const IndexType n_c)
typename AViewType::non_const_value_type AlphaCoeffType
void inner_products_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)