42 #ifndef KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
43 #define KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
45 #include <type_traits>
46 #include "Sacado_ConfigDefs.h"
52 #include "KokkosBlas.hpp"
55 #include "Kokkos_Core.hpp"
57 #include "Stokhos_config.h"
59 #define Sacado_MP_Vector_GEMV_Tile_Size(size) (STOKHOS_GEMV_CACHE_SIZE / size)
62 template <
class AViewType,
65 class IndexType =
typename AViewType::size_type>
82 KOKKOS_INLINE_FUNCTION
void
85 const IndexType m =
y_.extent(0);
86 const IndexType
n =
x_.extent(0);
88 IndexType i_min =
m_c_ * i_tile;
89 bool last_tile = (i_min +
m_c_ >= m);
90 IndexType i_max = (last_tile) ? m : (i_min + m_c_);
92 #ifdef STOKHOS_HAVE_PRAGMA_UNROLL
96 for (IndexType i = i_min; i < i_max; ++i)
99 for (IndexType i = i_min; i < i_max; ++i)
102 for (IndexType
j = 0;
j < n; ++
j)
106 for (IndexType i = i_min; i < i_max; ++i)
107 y_(i) += alphab *
A_(i,
j);
113 typename AViewType::const_type
A_;
114 typename XViewType::const_type
x_;
121 template <
class AViewType,
124 class IndexType =
typename AViewType::size_type>
143 KOKKOS_INLINE_FUNCTION
void
146 const IndexType m =
y_.extent(0);
147 const IndexType
n =
x_.extent(0);
149 const int j = team.league_rank();
150 const IndexType j_min =
n_c_ *
j;
151 const IndexType nj = (j_min +
n_c_ > n) ? (n - j_min) :
n_c_;
152 const IndexType i_min = j % m;
154 for (IndexType i = i_min; i < m; ++i)
157 Kokkos::parallel_reduce(
158 Kokkos::TeamThreadRange(team, nj), [=](
int jj,
Scalar &tmp_sum) {
159 tmp_sum +=
A_(jj + j_min, i) *
x_(jj + j_min);
162 if (team.team_rank() == 0)
165 Kokkos::atomic_add<Scalar>(&
y_(i), tmp);
168 for (IndexType i = 0; i < i_min; ++i)
171 Kokkos::parallel_reduce(
172 Kokkos::TeamThreadRange(team, nj), [=](
int jj,
Scalar &tmp_sum) {
173 tmp_sum +=
A_(jj + j_min, i) *
x_(jj + j_min);
176 if (team.team_rank() == 0)
179 Kokkos::atomic_add<Scalar>(&
y_(i), tmp);
186 typename AViewType::const_type
A_;
187 typename XViewType::const_type
x_;
197 typename VA::const_value_type &alpha,
200 typename VY::const_value_type &beta,
204 using IndexType =
typename VA::size_type;
205 using policy_type = Kokkos::RangePolicy<execution_space, IndexType>;
208 const size_t m = y.extent(0);
210 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
211 const size_t N = execution_space::thread_pool_size();
213 const size_t N = execution_space::impl_thread_pool_size();
216 const size_t n_tiles_per_thread =
ceil(((
double)m) / (N * m_c_star));
217 const size_t m_c =
ceil(((
double)m) / (N * n_tiles_per_thread));
218 const size_t n_tiles = N * n_tiles_per_thread;
220 policy_type range(0, n_tiles);
223 functor_type functor(alpha, A, x, beta, y, m_c);
225 Kokkos::parallel_for(
"KokkosBlas::gemv[Update]", range, functor);
233 typename VA::const_value_type &alpha,
236 typename VY::const_value_type &beta,
240 using IndexType =
typename VA::size_type;
241 using team_policy_type = Kokkos::TeamPolicy<execution_space>;
244 const size_t m = y.extent(0);
245 const size_t n = x.extent(0);
247 const size_t team_size = STOKHOS_GEMV_TEAM_SIZE;
249 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
250 const size_t N = execution_space::thread_pool_size();
252 const size_t N = execution_space::impl_thread_pool_size();
255 const size_t n_tiles_per_thread =
ceil(((
double)n) / (N * m_c_star));
256 const size_t m_c =
ceil(((
double)n) / (N * n_tiles_per_thread));
257 const size_t n_per_tile2 = m_c * team_size;
259 const size_t n_i2 =
ceil(((
double)n) / n_per_tile2);
261 team_policy_type team(n_i2, team_size);
264 Kokkos::parallel_for(
265 m, KOKKOS_LAMBDA(
const int i) {
269 Kokkos::parallel_for(
270 m, KOKKOS_LAMBDA(
const int i) {
275 functor_type functor(alpha, A, x, y, n_per_tile2);
277 Kokkos::parallel_for(
"KokkosBlas::gemv[InnerProducts]", team, functor);
282 template <
typename DA,
typename... PA,
283 typename DX,
typename... PX,
284 typename DY,
typename... PY>
289 typename Kokkos::View<DA, PA...>::const_value_type &alpha,
290 const Kokkos::View<DA, PA...> &
A,
291 const Kokkos::View<DX, PX...> &x,
292 typename Kokkos::View<DY, PY...>::const_value_type &beta,
293 const Kokkos::View<DY, PY...> &y)
296 typedef Kokkos::View<DA, PA...> VA;
297 typedef Kokkos::View<DX, PX...> VX;
298 typedef Kokkos::View<DY, PY...> VY;
300 static_assert(VA::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
301 static_assert(VX::rank == 1,
"GEMM: x must have rank 1 (be a vector).");
302 static_assert(VY::rank == 1,
"GEMM: y must have rank 1 (be a vector).");
304 if (trans[0] ==
'n' || trans[0] ==
'N')
305 update_MP<Scalar, VA, VX, VY>(alpha, A, x, beta, y);
307 inner_products_MP<Scalar, VA, VX, VY>(alpha, A, x, beta, y);
Kokkos::DefaultExecutionSpace execution_space
std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< DA, PA...> >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DX, PX...> >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DY, PY...> >::value >::type gemv(const char trans[], typename Kokkos::View< DA, PA...>::const_value_type &alpha, const Kokkos::View< DA, PA...> &A, const Kokkos::View< DX, PX...> &x, typename Kokkos::View< DY, PY...>::const_value_type &beta, const Kokkos::View< DY, PY...> &y)
#define Sacado_MP_Vector_GEMV_Tile_Size(size)
Kokkos::DefaultExecutionSpace execution_space
Kokkos::TeamPolicy< execution_space > policy_type
void update_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
typename AViewType::execution_space execution_space
typename AViewType::non_const_value_type AlphaCoeffType
KOKKOS_INLINE_FUNCTION PCE< Storage > ceil(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION void operator()(const IndexType &i_tile) const
KOKKOS_INLINE_FUNCTION void operator()(const member_type &team) const
typename YViewType::non_const_value_type BetaCoeffType
typename policy_type::member_type member_type
updateF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const BetaCoeffType &beta, const YViewType &y, const IndexType m_c)
innerF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const YViewType &y, const IndexType n_c)
typename AViewType::non_const_value_type AlphaCoeffType
void inner_products_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)