Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KokkosBlas2_gemv_MP_Vector.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
11 #define KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
12 
13 #include <type_traits>
14 #include "Sacado_ConfigDefs.h"
15 
16 #include "Stokhos_ViewStorage.hpp"
17 #include "Sacado_MP_Vector.hpp"
20 #include "KokkosBlas.hpp"
21 
23 #include "Kokkos_Core.hpp"
24 
25 #include "Stokhos_config.h"
26 
27 #define Sacado_MP_Vector_GEMV_Tile_Size(size) (STOKHOS_GEMV_CACHE_SIZE / size)
28 
29 // Functor for the update
30 template <class AViewType,
31  class XViewType,
32  class YViewType,
33  class IndexType = typename AViewType::size_type>
34 struct updateF
35 {
36  using AlphaCoeffType = typename AViewType::non_const_value_type;
37  using BetaCoeffType = typename YViewType::non_const_value_type;
38 
39  updateF(const AlphaCoeffType &alpha,
40  const AViewType &A,
41  const XViewType &x,
42  const BetaCoeffType &beta,
43  const YViewType &y,
44  const IndexType m_c) : alpha_(alpha), A_(A), x_(x), beta_(beta), y_(y), m_c_(m_c)
45  {
46  }
47 
48 public:
49  // i_tile is the current tile of the input matrix A
50  KOKKOS_INLINE_FUNCTION void
51  operator()(const IndexType &i_tile) const
52  {
53  const IndexType m = y_.extent(0);
54  const IndexType n = x_.extent(0);
55 
56  IndexType i_min = m_c_ * i_tile;
57  bool last_tile = (i_min + m_c_ >= m);
58  IndexType i_max = (last_tile) ? m : (i_min + m_c_);
59 
60 #ifdef STOKHOS_HAVE_PRAGMA_UNROLL
61 #pragma unroll
62 #endif
63  if (beta_ == BetaCoeffType(0.))
64  for (IndexType i = i_min; i < i_max; ++i)
65  y_(i) = beta_;
66  else
67  for (IndexType i = i_min; i < i_max; ++i)
68  y_(i) *= beta_;
69 
70  for (IndexType j = 0; j < n; ++j)
71  {
72  AlphaCoeffType alphab = alpha_ * x_(j);
73 
74  for (IndexType i = i_min; i < i_max; ++i)
75  y_(i) += alphab * A_(i, j);
76  }
77  }
78 
79 private:
81  typename AViewType::const_type A_;
82  typename XViewType::const_type x_;
84  YViewType y_;
85  const IndexType m_c_;
86 };
87 
88 // Functor for the inner products
89 template <class AViewType,
90  class XViewType,
91  class YViewType,
92  class IndexType = typename AViewType::size_type>
93 struct innerF
94 {
96  using policy_type = Kokkos::TeamPolicy<execution_space>;
97  using member_type = typename policy_type::member_type;
98 
99  using AlphaCoeffType = typename AViewType::non_const_value_type;
101 
102  innerF(const AlphaCoeffType &alpha,
103  const AViewType &A,
104  const XViewType &x,
105  const YViewType &y,
106  const IndexType n_c) : alpha_(alpha), A_(A), x_(x), y_(y), n_c_(n_c)
107  {
108  }
109 
110 public:
111  KOKKOS_INLINE_FUNCTION void
112  operator()(const member_type &team) const
113  {
114  const IndexType m = y_.extent(0);
115  const IndexType n = x_.extent(0);
116 
117  const int j = team.league_rank();
118  const IndexType j_min = n_c_ * j;
119  const IndexType nj = (j_min + n_c_ > n) ? (n - j_min) : n_c_;
120  const IndexType i_min = j % m;
121 
122  for (IndexType i = i_min; i < m; ++i)
123  {
124  Scalar tmp = 0.;
125  Kokkos::parallel_reduce(
126  Kokkos::TeamThreadRange(team, nj), [=](int jj, Scalar &tmp_sum) {
127  tmp_sum += A_(jj + j_min, i) * x_(jj + j_min);
128  },
129  tmp);
130  if (team.team_rank() == 0)
131  {
132  tmp *= alpha_;
133  Kokkos::atomic_add(&y_(i), tmp);
134  }
135  }
136  for (IndexType i = 0; i < i_min; ++i)
137  {
138  Scalar tmp = 0.;
139  Kokkos::parallel_reduce(
140  Kokkos::TeamThreadRange(team, nj), [=](int jj, Scalar &tmp_sum) {
141  tmp_sum += A_(jj + j_min, i) * x_(jj + j_min);
142  },
143  tmp);
144  if (team.team_rank() == 0)
145  {
146  tmp *= alpha_;
147  Kokkos::atomic_add(&y_(i), tmp);
148  }
149  }
150  }
151 
152 private:
154  typename AViewType::const_type A_;
155  typename XViewType::const_type x_;
156  YViewType y_;
157  const IndexType n_c_;
158 };
159 
160 template <class Scalar,
161  class VA,
162  class VX,
163  class VY>
165  typename VA::const_value_type &alpha,
166  const VA &A,
167  const VX &x,
168  typename VY::const_value_type &beta,
169  const VY &y)
170 {
171  using execution_space = typename VA::execution_space;
172  using IndexType = typename VA::size_type;
173  using policy_type = Kokkos::RangePolicy<execution_space, IndexType>;
174 
175  // Get the dimensions
176  const size_t m = y.extent(0);
177 
178  const size_t N = execution_space().concurrency();
179  const size_t m_c_star = Sacado_MP_Vector_GEMV_Tile_Size(sizeof(Scalar));
180  const size_t n_tiles_per_thread = ceil(((double)m) / (N * m_c_star));
181  const size_t m_c = ceil(((double)m) / (N * n_tiles_per_thread));
182  const size_t n_tiles = N * n_tiles_per_thread;
183 
184  policy_type range(0, n_tiles);
185 
186  using functor_type = updateF<VA, VX, VY, IndexType>;
187  functor_type functor(alpha, A, x, beta, y, m_c);
188 
189  Kokkos::parallel_for("KokkosBlas::gemv[Update]", range, functor);
190 }
191 
192 template <class Scalar,
193  class VA,
194  class VX,
195  class VY>
197  typename VA::const_value_type &alpha,
198  const VA &A,
199  const VX &x,
200  typename VY::const_value_type &beta,
201  const VY &y)
202 {
203  using execution_space = typename VA::execution_space;
204  using IndexType = typename VA::size_type;
205  using team_policy_type = Kokkos::TeamPolicy<execution_space>;
206 
207  // Get the dimensions
208  const size_t m = y.extent(0);
209  const size_t n = x.extent(0);
210 
211  const size_t team_size = STOKHOS_GEMV_TEAM_SIZE;
212 
213  const size_t N = execution_space().concurrency();
214  const size_t m_c_star = Sacado_MP_Vector_GEMV_Tile_Size(sizeof(Scalar));
215  const size_t n_tiles_per_thread = ceil(((double)n) / (N * m_c_star));
216  const size_t m_c = ceil(((double)n) / (N * n_tiles_per_thread));
217  const size_t n_per_tile2 = m_c * team_size;
218 
219  const size_t n_i2 = ceil(((double)n) / n_per_tile2);
220 
221  team_policy_type team(n_i2, team_size);
222 
223  if (beta == Scalar(0.))
224  Kokkos::parallel_for(
225  m, KOKKOS_LAMBDA(const int i) {
226  y(i) = beta;
227  });
228  else
229  Kokkos::parallel_for(
230  m, KOKKOS_LAMBDA(const int i) {
231  y(i) *= beta;
232  });
233 
234  using functor_type = innerF<VA, VX, VY, IndexType>;
235  functor_type functor(alpha, A, x, y, n_per_tile2);
236 
237  Kokkos::parallel_for("KokkosBlas::gemv[InnerProducts]", team, functor);
238 }
239 
240 namespace KokkosBlas
241 {
242 template <typename DA, typename... PA,
243  typename DX, typename... PX,
244  typename DY, typename... PY>
245 typename std::enable_if<Kokkos::is_view_mp_vector<Kokkos::View<DA, PA...>>::value &&
246  Kokkos::is_view_mp_vector<Kokkos::View<DX, PX...>>::value &&
247  Kokkos::is_view_mp_vector<Kokkos::View<DY, PY...>>::value>::type
248 gemv(const char trans[],
249  typename Kokkos::View<DA, PA...>::const_value_type &alpha,
250  const Kokkos::View<DA, PA...> &A,
251  const Kokkos::View<DX, PX...> &x,
252  typename Kokkos::View<DY, PY...>::const_value_type &beta,
253  const Kokkos::View<DY, PY...> &y)
254 {
255  typedef typename Kokkos::View<DA, PA...>::value_type Scalar;
256  typedef Kokkos::View<DA, PA...> VA;
257  typedef Kokkos::View<DX, PX...> VX;
258  typedef Kokkos::View<DY, PY...> VY;
259 
260  static_assert(VA::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
261  static_assert(VX::rank == 1, "GEMM: x must have rank 1 (be a vector).");
262  static_assert(VY::rank == 1, "GEMM: y must have rank 1 (be a vector).");
263 
264  if (trans[0] == 'n' || trans[0] == 'N')
265  update_MP<Scalar, VA, VX, VY>(alpha, A, x, beta, y);
266  else
267  inner_products_MP<Scalar, VA, VX, VY>(alpha, A, x, beta, y);
268 }
269 
270 } // namespace KokkosBlas
271 #endif
Kokkos::DefaultExecutionSpace execution_space
std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< DA, PA...> >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DX, PX...> >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DY, PY...> >::value >::type gemv(const char trans[], typename Kokkos::View< DA, PA...>::const_value_type &alpha, const Kokkos::View< DA, PA...> &A, const Kokkos::View< DX, PX...> &x, typename Kokkos::View< DY, PY...>::const_value_type &beta, const Kokkos::View< DY, PY...> &y)
#define Sacado_MP_Vector_GEMV_Tile_Size(size)
Kokkos::DefaultExecutionSpace execution_space
Kokkos::TeamPolicy< execution_space > policy_type
AViewType::const_type A_
void update_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
KOKKOS_INLINE_FUNCTION void atomic_add(volatile Sacado::UQ::PCE< Storage > *const dest, const Sacado::UQ::PCE< Storage > &src)
XViewType::const_type x_
typename AViewType::execution_space execution_space
AlphaCoeffType alpha_
typename AViewType::non_const_value_type AlphaCoeffType
KOKKOS_INLINE_FUNCTION PCE< Storage > ceil(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION void operator()(const IndexType &i_tile) const
AViewType::const_type A_
KOKKOS_INLINE_FUNCTION void operator()(const member_type &team) const
const IndexType n_c_
typename YViewType::non_const_value_type BetaCoeffType
AlphaCoeffType Scalar
typename policy_type::member_type member_type
updateF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const BetaCoeffType &beta, const YViewType &y, const IndexType m_c)
const IndexType m_c_
int n
innerF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const YViewType &y, const IndexType n_c)
typename AViewType::non_const_value_type AlphaCoeffType
XViewType::const_type x_
void inner_products_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
AlphaCoeffType alpha_