Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_Multiply.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef STOKHOS_MULTIPLY_HPP
11 #define STOKHOS_MULTIPLY_HPP
12 
13 //#include "Kokkos_Macros.hpp"
14 //#include "Kokkos_Pair.hpp"
15 //#include "impl/Kokkos_Traits.hpp"
16 
17 #include "Kokkos_Core.hpp"
18 
19 #include <vector> // for std::vector (needed below)
20 
21 namespace Stokhos {
22 
23 template <size_t N>
25  enum type { value = (N > 0) && !(N & (N - 1)) };
26 };
27 
28 template <size_t N, bool OK = is_power_of_two<N>::value>
29 struct power_of_two;
30 
31 template <size_t N>
32 struct power_of_two<N, true> {
33  enum type { value = 1 + power_of_two<(N >> 1), true>::value };
34 };
35 
36 template <>
37 struct power_of_two<2, true> {
38  enum type { value = 1 };
39 };
40 
41 template <>
42 struct power_of_two<1, true> {
43  enum type { value = 0 };
44 };
45 
46 class DefaultMultiply {};
47 
48 template <unsigned> class IntegralRank {};
49 
50 template <typename T> struct ViewRank {
52 };
53 
54 template <typename T> struct ViewRank< std::vector<T> > {
56 };
57 
58 template <typename MatrixType,
59  typename InputVectorType,
60  typename OutputVectorType,
61  typename ColumnIndicesType = void,
62  typename VectorRank = typename ViewRank<InputVectorType>::type,
63  typename ImplTag = DefaultMultiply
64  > class Multiply;
65 
66 template <typename MatrixType,
67  typename InputVectorType,
68  typename OutputVectorType>
69 void multiply(const MatrixType& A,
70  const InputVectorType& x,
71  OutputVectorType& y) {
73  multiply_type::apply( A, x, y );
74 }
75 
76 namespace { // (anonymous)
77 
78 // Work-around for CWG 1558. See
79 // https://en.cppreference.com/w/cpp/types/void_t
80 template<class... Ts> struct make_void { typedef void type; };
81 template<class... Ts>
82 using replace_me_with_void_t_in_cxx17 =
83  typename make_void<Ts...>::type;
84 
85 template<class T, class = replace_me_with_void_t_in_cxx17<> >
86 struct const_type_impl {
87  using type = T;
88 };
89 
90 template<class T>
91 struct const_type_impl<T,
92  replace_me_with_void_t_in_cxx17<typename T::const_type> > {
93  using type = typename T::const_type;
94 };
95 
96 template<class T>
97 using const_type_t = typename const_type_impl<T>::type;
98 
99 } // namespace (anonymous)
100 
101 template <typename MatrixType,
102  typename InputVectorType,
103  typename OutputVectorType>
104 void multiply(const MatrixType& A,
105  const InputVectorType& x,
106  OutputVectorType& y,
107  DefaultMultiply tag) {
108  // mfh 29 Jul 2019: Not sure why, but std::vector claims to be a
109  // Kokkos::View using Kokkos::is_view. This is why I check instead
110  // whether the class has a const_type typedef.
111  using input_vector_type = const_type_t<InputVectorType>;
112  using multiply_type =
114  multiply_type::apply( A, x, y );
115 }
116 
117 template <typename MatrixType,
118  typename InputVectorType,
119  typename OutputVectorType,
120  typename ColumnIndicesType>
121 void multiply(const MatrixType& A,
122  const InputVectorType& x,
123  OutputVectorType& y,
124  const ColumnIndicesType& col) {
126  multiply_type::apply( A, x, y, col );
127 }
128 
129 template <typename MatrixType,
130  typename InputVectorType,
131  typename OutputVectorType,
132  typename ColumnIndicesType>
133 void multiply(const MatrixType& A,
134  const InputVectorType& x,
135  OutputVectorType& y,
136  const ColumnIndicesType& col,
137  DefaultMultiply tag) {
139  multiply_type::apply( A, x, y, col );
140 }
141 
142 template <typename BlockSpec> class BlockMultiply;
143 
144 namespace details {
145 
146 /*
147  * Compute work range = (begin, end) such that adjacent threads/blocks write to
148  * separate cache lines
149  */
150 template <typename scalar_type, typename execution_space, typename size_type>
151 KOKKOS_INLINE_FUNCTION
152 Kokkos::pair<size_type, size_type>
154  const size_type work_count,
155  const size_type thread_count,
156  const size_type thread_rank)
157 {
158 #if defined( KOKKOS_ENABLE_CUDA )
159  enum { cache_line =
160  std::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
161 #else
162  enum { cache_line = 64 };
163 #endif
164 
165  enum { work_align = cache_line / sizeof(scalar_type) };
166  enum { work_shift = power_of_two< work_align >::value };
167  enum { work_mask = work_align - 1 };
168 
169  const size_type work_per_thread =
170  ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
171  thread_count ) << work_shift ;
172 
173  size_type work_begin = thread_rank * work_per_thread;
174  size_type work_end = work_begin + work_per_thread;
175  if (work_begin > work_count)
176  work_begin = work_count;
177  if (work_end > work_count)
178  work_end = work_count;
179 
180  return Kokkos::make_pair(work_begin, work_end);
181 }
182 
183 // Functor implementing assignment update for multiply kernels
185  template <typename Scalar>
186  KOKKOS_INLINE_FUNCTION
187  void operator()(Scalar& y, const Scalar& x) const { y = x; }
188 };
189 
190 // Functor implementing += update for multiply kernels
192  template <typename Scalar>
193  KOKKOS_INLINE_FUNCTION
194  void operator()(Scalar& y, const Scalar& x) const { y += x; }
195 };
196 
197 // Functor implementing scaled assignment update for multiply kernels
198 template <typename Value>
200  const Value a;
201  MultiplyScaledAssign(const Value& a_) : a(a_) {}
202  template <typename Scalar>
203  KOKKOS_INLINE_FUNCTION
204  void operator()(Scalar& y, const Scalar& x) const { y = a*x; }
205 };
206 
207 // Functor implementing += update for multiply kernels
208 template <typename Value>
210  const Value a;
211  MultiplyScaledUpdate(const Value& a_) : a(a_) {}
212  template <typename Scalar>
213  KOKKOS_INLINE_FUNCTION
214  void operator()(Scalar& y, const Scalar& x) const { y += a*x; }
215 };
216 
217 // Functor implementing saxpby update for multiply kernels
218 template <typename Value>
220  const Value a;
221  const Value b;
222  MultiplyScaledUpdate2(const Value& a_, const Value& b_) : a(a_), b(b_) {}
223  template <typename Scalar>
224  KOKKOS_INLINE_FUNCTION
225  void operator()(Scalar& y, const Scalar& x) const { y = a*x + b*y; }
226 };
227 
228 } // namespace details
229 
230 } // namespace Stokhos
231 
232 #endif
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
Kokkos::DefaultExecutionSpace device
IntegralRank< T::rank > type