Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_Multiply.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_MULTIPLY_HPP
43 #define STOKHOS_MULTIPLY_HPP
44 
45 //#include "Kokkos_Macros.hpp"
46 //#include "Kokkos_Pair.hpp"
47 //#include "impl/Kokkos_Traits.hpp"
48 
49 #include "Kokkos_Core.hpp"
50 
51 #include <vector> // for std::vector (needed below)
52 
53 namespace Stokhos {
54 
55 class DefaultMultiply {};
56 
57 template <unsigned> class IntegralRank {};
58 
59 template <typename T> struct ViewRank {
61 };
62 
63 template <typename T> struct ViewRank< std::vector<T> > {
65 };
66 
67 template <typename MatrixType,
68  typename InputVectorType,
69  typename OutputVectorType,
70  typename ColumnIndicesType = void,
71  typename VectorRank = typename ViewRank<InputVectorType>::type,
72  typename ImplTag = DefaultMultiply
73  > class Multiply;
74 
75 template <typename MatrixType,
76  typename InputVectorType,
77  typename OutputVectorType>
78 void multiply(const MatrixType& A,
79  const InputVectorType& x,
80  OutputVectorType& y) {
82  multiply_type::apply( A, x, y );
83 }
84 
85 namespace { // (anonymous)
86 
87 // Work-around for CWG 1558. See
88 // https://en.cppreference.com/w/cpp/types/void_t
89 template<class... Ts> struct make_void { typedef void type; };
90 template<class... Ts>
91 using replace_me_with_void_t_in_cxx17 =
92  typename make_void<Ts...>::type;
93 
94 template<class T, class = replace_me_with_void_t_in_cxx17<> >
95 struct const_type_impl {
96  using type = T;
97 };
98 
99 template<class T>
100 struct const_type_impl<T,
101  replace_me_with_void_t_in_cxx17<typename T::const_type> > {
102  using type = typename T::const_type;
103 };
104 
105 template<class T>
106 using const_type_t = typename const_type_impl<T>::type;
107 
108 } // namespace (anonymous)
109 
110 template <typename MatrixType,
111  typename InputVectorType,
112  typename OutputVectorType>
113 void multiply(const MatrixType& A,
114  const InputVectorType& x,
115  OutputVectorType& y,
116  DefaultMultiply tag) {
117  // mfh 29 Jul 2019: Not sure why, but std::vector claims to be a
118  // Kokkos::View using Kokkos::is_view. This is why I check instead
119  // whether the class has a const_type typedef.
120  using input_vector_type = const_type_t<InputVectorType>;
121  using multiply_type =
123  multiply_type::apply( A, x, y );
124 }
125 
126 template <typename MatrixType,
127  typename InputVectorType,
128  typename OutputVectorType,
129  typename ColumnIndicesType>
130 void multiply(const MatrixType& A,
131  const InputVectorType& x,
132  OutputVectorType& y,
133  const ColumnIndicesType& col) {
135  multiply_type::apply( A, x, y, col );
136 }
137 
138 template <typename MatrixType,
139  typename InputVectorType,
140  typename OutputVectorType,
141  typename ColumnIndicesType>
142 void multiply(const MatrixType& A,
143  const InputVectorType& x,
144  OutputVectorType& y,
145  const ColumnIndicesType& col,
146  DefaultMultiply tag) {
148  multiply_type::apply( A, x, y, col );
149 }
150 
151 template <typename BlockSpec> class BlockMultiply;
152 
153 namespace details {
154 
155 /*
156  * Compute work range = (begin, end) such that adjacent threads/blocks write to
157  * separate cache lines
158  */
159 template <typename scalar_type, typename execution_space, typename size_type>
160 KOKKOS_INLINE_FUNCTION
161 Kokkos::pair<size_type, size_type>
163  const size_type work_count,
164  const size_type thread_count,
165  const size_type thread_rank)
166 {
167 #if defined( KOKKOS_ENABLE_CUDA )
168  enum { cache_line =
169  std::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
170 #else
171  enum { cache_line = 64 };
172 #endif
173 
174  enum { work_align = cache_line / sizeof(scalar_type) };
175  enum { work_shift = Kokkos::Impl::power_of_two< work_align >::value };
176  enum { work_mask = work_align - 1 };
177 
178  const size_type work_per_thread =
179  ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
180  thread_count ) << work_shift ;
181 
182  size_type work_begin = thread_rank * work_per_thread;
183  size_type work_end = work_begin + work_per_thread;
184  if (work_begin > work_count)
185  work_begin = work_count;
186  if (work_end > work_count)
187  work_end = work_count;
188 
189  return Kokkos::make_pair(work_begin, work_end);
190 }
191 
192 // Functor implementing assignment update for multiply kernels
194  template <typename Scalar>
195  KOKKOS_INLINE_FUNCTION
196  void operator()(Scalar& y, const Scalar& x) const { y = x; }
197 };
198 
199 // Functor implementing += update for multiply kernels
201  template <typename Scalar>
202  KOKKOS_INLINE_FUNCTION
203  void operator()(Scalar& y, const Scalar& x) const { y += x; }
204 };
205 
206 // Functor implementing scaled assignment update for multiply kernels
207 template <typename Value>
209  const Value a;
210  MultiplyScaledAssign(const Value& a_) : a(a_) {}
211  template <typename Scalar>
212  KOKKOS_INLINE_FUNCTION
213  void operator()(Scalar& y, const Scalar& x) const { y = a*x; }
214 };
215 
216 // Functor implementing += update for multiply kernels
217 template <typename Value>
219  const Value a;
220  MultiplyScaledUpdate(const Value& a_) : a(a_) {}
221  template <typename Scalar>
222  KOKKOS_INLINE_FUNCTION
223  void operator()(Scalar& y, const Scalar& x) const { y += a*x; }
224 };
225 
226 // Functor implementing saxpby update for multiply kernels
227 template <typename Value>
229  const Value a;
230  const Value b;
231  MultiplyScaledUpdate2(const Value& a_, const Value& b_) : a(a_), b(b_) {}
232  template <typename Scalar>
233  KOKKOS_INLINE_FUNCTION
234  void operator()(Scalar& y, const Scalar& x) const { y = a*x + b*y; }
235 };
236 
237 } // namespace details
238 
239 } // namespace Stokhos
240 
241 #endif
IntegralRank< T::Rank > type
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
Kokkos::DefaultExecutionSpace device