Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KokkosBlas2_gemv_MP_Vector.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
43 #define KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
44 
45 #include <type_traits>
46 #include "Sacado_ConfigDefs.h"
47 
48 #include "Stokhos_ViewStorage.hpp"
49 #include "Sacado_MP_Vector.hpp"
52 #include "KokkosBlas.hpp"
53 
55 #include "Kokkos_Core.hpp"
56 
57 #include "Stokhos_config.h"
58 
59 #define Sacado_MP_Vector_GEMV_Tile_Size(size) (STOKHOS_GEMV_CACHE_SIZE / size)
60 
61 // Functor for the update
62 template <class AViewType,
63  class XViewType,
64  class YViewType,
65  class IndexType = typename AViewType::size_type>
66 struct updateF
67 {
68  using AlphaCoeffType = typename AViewType::non_const_value_type;
69  using BetaCoeffType = typename YViewType::non_const_value_type;
70 
71  updateF(const AlphaCoeffType &alpha,
72  const AViewType &A,
73  const XViewType &x,
74  const BetaCoeffType &beta,
75  const YViewType &y,
76  const IndexType m_c) : alpha_(alpha), A_(A), x_(x), beta_(beta), y_(y), m_c_(m_c)
77  {
78  }
79 
80 public:
81  // i_tile is the current tile of the input matrix A
82  KOKKOS_INLINE_FUNCTION void
83  operator()(const IndexType &i_tile) const
84  {
85  const IndexType m = y_.extent(0);
86  const IndexType n = x_.extent(0);
87 
88  IndexType i_min = m_c_ * i_tile;
89  bool last_tile = (i_min + m_c_ >= m);
90  IndexType i_max = (last_tile) ? m : (i_min + m_c_);
91 
92 #ifdef STOKHOS_HAVE_PRAGMA_UNROLL
93 #pragma unroll
94 #endif
95  if (beta_ == BetaCoeffType(0.))
96  for (IndexType i = i_min; i < i_max; ++i)
97  y_(i) = beta_;
98  else
99  for (IndexType i = i_min; i < i_max; ++i)
100  y_(i) *= beta_;
101 
102  for (IndexType j = 0; j < n; ++j)
103  {
104  AlphaCoeffType alphab = alpha_ * x_(j);
105 
106  for (IndexType i = i_min; i < i_max; ++i)
107  y_(i) += alphab * A_(i, j);
108  }
109  }
110 
111 private:
113  typename AViewType::const_type A_;
114  typename XViewType::const_type x_;
116  YViewType y_;
117  const IndexType m_c_;
118 };
119 
120 // Functor for the inner products
121 template <class AViewType,
122  class XViewType,
123  class YViewType,
124  class IndexType = typename AViewType::size_type>
125 struct innerF
126 {
128  using policy_type = Kokkos::TeamPolicy<execution_space>;
129  using member_type = typename policy_type::member_type;
130 
131  using AlphaCoeffType = typename AViewType::non_const_value_type;
133 
134  innerF(const AlphaCoeffType &alpha,
135  const AViewType &A,
136  const XViewType &x,
137  const YViewType &y,
138  const IndexType n_c) : alpha_(alpha), A_(A), x_(x), y_(y), n_c_(n_c)
139  {
140  }
141 
142 public:
143  KOKKOS_INLINE_FUNCTION void
144  operator()(const member_type &team) const
145  {
146  const IndexType m = y_.extent(0);
147  const IndexType n = x_.extent(0);
148 
149  const int j = team.league_rank();
150  const IndexType j_min = n_c_ * j;
151  const IndexType nj = (j_min + n_c_ > n) ? (n - j_min) : n_c_;
152  const IndexType i_min = j % m;
153 
154  for (IndexType i = i_min; i < m; ++i)
155  {
156  Scalar tmp = 0.;
157  Kokkos::parallel_reduce(
158  Kokkos::TeamThreadRange(team, nj), [=](int jj, Scalar &tmp_sum) {
159  tmp_sum += A_(jj + j_min, i) * x_(jj + j_min);
160  },
161  tmp);
162  if (team.team_rank() == 0)
163  {
164  tmp *= alpha_;
165  Kokkos::atomic_add<Scalar>(&y_(i), tmp);
166  }
167  }
168  for (IndexType i = 0; i < i_min; ++i)
169  {
170  Scalar tmp = 0.;
171  Kokkos::parallel_reduce(
172  Kokkos::TeamThreadRange(team, nj), [=](int jj, Scalar &tmp_sum) {
173  tmp_sum += A_(jj + j_min, i) * x_(jj + j_min);
174  },
175  tmp);
176  if (team.team_rank() == 0)
177  {
178  tmp *= alpha_;
179  Kokkos::atomic_add<Scalar>(&y_(i), tmp);
180  }
181  }
182  }
183 
184 private:
186  typename AViewType::const_type A_;
187  typename XViewType::const_type x_;
188  YViewType y_;
189  const IndexType n_c_;
190 };
191 
192 template <class Scalar,
193  class VA,
194  class VX,
195  class VY>
197  typename VA::const_value_type &alpha,
198  const VA &A,
199  const VX &x,
200  typename VY::const_value_type &beta,
201  const VY &y)
202 {
203  using execution_space = typename VA::execution_space;
204  using IndexType = typename VA::size_type;
205  using policy_type = Kokkos::RangePolicy<execution_space, IndexType>;
206 
207  // Get the dimensions
208  const size_t m = y.extent(0);
209 
210 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
211  const size_t N = execution_space::thread_pool_size();
212 #else
213  const size_t N = execution_space::impl_thread_pool_size();
214 #endif
215  const size_t m_c_star = Sacado_MP_Vector_GEMV_Tile_Size(sizeof(Scalar));
216  const size_t n_tiles_per_thread = ceil(((double)m) / (N * m_c_star));
217  const size_t m_c = ceil(((double)m) / (N * n_tiles_per_thread));
218  const size_t n_tiles = N * n_tiles_per_thread;
219 
220  policy_type range(0, n_tiles);
221 
222  using functor_type = updateF<VA, VX, VY, IndexType>;
223  functor_type functor(alpha, A, x, beta, y, m_c);
224 
225  Kokkos::parallel_for("KokkosBlas::gemv[Update]", range, functor);
226 }
227 
228 template <class Scalar,
229  class VA,
230  class VX,
231  class VY>
233  typename VA::const_value_type &alpha,
234  const VA &A,
235  const VX &x,
236  typename VY::const_value_type &beta,
237  const VY &y)
238 {
239  using execution_space = typename VA::execution_space;
240  using IndexType = typename VA::size_type;
241  using team_policy_type = Kokkos::TeamPolicy<execution_space>;
242 
243  // Get the dimensions
244  const size_t m = y.extent(0);
245  const size_t n = x.extent(0);
246 
247  const size_t team_size = STOKHOS_GEMV_TEAM_SIZE;
248 
249 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
250  const size_t N = execution_space::thread_pool_size();
251 #else
252  const size_t N = execution_space::impl_thread_pool_size();
253 #endif
254  const size_t m_c_star = Sacado_MP_Vector_GEMV_Tile_Size(sizeof(Scalar));
255  const size_t n_tiles_per_thread = ceil(((double)n) / (N * m_c_star));
256  const size_t m_c = ceil(((double)n) / (N * n_tiles_per_thread));
257  const size_t n_per_tile2 = m_c * team_size;
258 
259  const size_t n_i2 = ceil(((double)n) / n_per_tile2);
260 
261  team_policy_type team(n_i2, team_size);
262 
263  if (beta == Scalar(0.))
264  Kokkos::parallel_for(
265  m, KOKKOS_LAMBDA(const int i) {
266  y(i) = beta;
267  });
268  else
269  Kokkos::parallel_for(
270  m, KOKKOS_LAMBDA(const int i) {
271  y(i) *= beta;
272  });
273 
274  using functor_type = innerF<VA, VX, VY, IndexType>;
275  functor_type functor(alpha, A, x, y, n_per_tile2);
276 
277  Kokkos::parallel_for("KokkosBlas::gemv[InnerProducts]", team, functor);
278 }
279 
280 namespace KokkosBlas
281 {
282 template <typename DA, typename... PA,
283  typename DX, typename... PX,
284  typename DY, typename... PY>
285 typename std::enable_if<Kokkos::is_view_mp_vector<Kokkos::View<DA, PA...>>::value &&
286  Kokkos::is_view_mp_vector<Kokkos::View<DX, PX...>>::value &&
287  Kokkos::is_view_mp_vector<Kokkos::View<DY, PY...>>::value>::type
288 gemv(const char trans[],
289  typename Kokkos::View<DA, PA...>::const_value_type &alpha,
290  const Kokkos::View<DA, PA...> &A,
291  const Kokkos::View<DX, PX...> &x,
292  typename Kokkos::View<DY, PY...>::const_value_type &beta,
293  const Kokkos::View<DY, PY...> &y)
294 {
295  typedef typename Kokkos::View<DA, PA...>::value_type Scalar;
296  typedef Kokkos::View<DA, PA...> VA;
297  typedef Kokkos::View<DX, PX...> VX;
298  typedef Kokkos::View<DY, PY...> VY;
299 
300  static_assert(VA::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
301  static_assert(VX::rank == 1, "GEMM: x must have rank 1 (be a vector).");
302  static_assert(VY::rank == 1, "GEMM: y must have rank 1 (be a vector).");
303 
304  if (trans[0] == 'n' || trans[0] == 'N')
305  update_MP<Scalar, VA, VX, VY>(alpha, A, x, beta, y);
306  else
307  inner_products_MP<Scalar, VA, VX, VY>(alpha, A, x, beta, y);
308 }
309 
310 } // namespace KokkosBlas
311 #endif
Kokkos::DefaultExecutionSpace execution_space
std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< DA, PA...> >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DX, PX...> >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DY, PY...> >::value >::type gemv(const char trans[], typename Kokkos::View< DA, PA...>::const_value_type &alpha, const Kokkos::View< DA, PA...> &A, const Kokkos::View< DX, PX...> &x, typename Kokkos::View< DY, PY...>::const_value_type &beta, const Kokkos::View< DY, PY...> &y)
#define Sacado_MP_Vector_GEMV_Tile_Size(size)
Kokkos::DefaultExecutionSpace execution_space
Kokkos::TeamPolicy< execution_space > policy_type
AViewType::const_type A_
void update_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
XViewType::const_type x_
typename AViewType::execution_space execution_space
AlphaCoeffType alpha_
typename AViewType::non_const_value_type AlphaCoeffType
KOKKOS_INLINE_FUNCTION PCE< Storage > ceil(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION void operator()(const IndexType &i_tile) const
AViewType::const_type A_
KOKKOS_INLINE_FUNCTION void operator()(const member_type &team) const
const IndexType n_c_
typename YViewType::non_const_value_type BetaCoeffType
AlphaCoeffType Scalar
typename policy_type::member_type member_type
updateF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const BetaCoeffType &beta, const YViewType &y, const IndexType m_c)
const IndexType m_c_
int n
innerF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const YViewType &y, const IndexType n_c)
typename AViewType::non_const_value_type AlphaCoeffType
XViewType::const_type x_
void inner_products_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
AlphaCoeffType alpha_