Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_Multiply.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_MULTIPLY_HPP
43 #define STOKHOS_MULTIPLY_HPP
44 
45 //#include "Kokkos_Macros.hpp"
46 //#include "Kokkos_Pair.hpp"
47 //#include "impl/Kokkos_Traits.hpp"
48 
49 #include "Kokkos_Core.hpp"
50 
51 #include <vector> // for std::vector (needed below)
52 
53 namespace Stokhos {
54 
55 class DefaultMultiply {};
56 
57 template <unsigned> class IntegralRank {};
58 
59 template <typename T> struct ViewRank {
61 };
62 
63 template <typename T> struct ViewRank< std::vector<T> > {
65 };
66 
67 template <typename MatrixType,
68  typename InputVectorType,
69  typename OutputVectorType,
70  typename ColumnIndicesType = void,
71  typename VectorRank = typename ViewRank<InputVectorType>::type,
72  typename ImplTag = DefaultMultiply
73  > class Multiply;
74 
75 template <typename MatrixType,
76  typename InputVectorType,
77  typename OutputVectorType>
78 void multiply(const MatrixType& A,
79  const InputVectorType& x,
80  OutputVectorType& y) {
82  multiply_type::apply( A, x, y );
83 }
84 
85 template <typename MatrixType,
86  typename InputVectorType,
87  typename OutputVectorType>
88 void multiply(const MatrixType& A,
89  const InputVectorType& x,
90  OutputVectorType& y,
91  DefaultMultiply tag) {
93  multiply_type::apply( A, x, y );
94 }
95 
96 template <typename MatrixType,
97  typename InputVectorType,
98  typename OutputVectorType,
99  typename ColumnIndicesType>
100 void multiply(const MatrixType& A,
101  const InputVectorType& x,
102  OutputVectorType& y,
103  const ColumnIndicesType& col) {
105  multiply_type::apply( A, x, y, col );
106 }
107 
108 template <typename MatrixType,
109  typename InputVectorType,
110  typename OutputVectorType,
111  typename ColumnIndicesType>
112 void multiply(const MatrixType& A,
113  const InputVectorType& x,
114  OutputVectorType& y,
115  const ColumnIndicesType& col,
116  DefaultMultiply tag) {
118  multiply_type::apply( A, x, y, col );
119 }
120 
121 template <typename BlockSpec> class BlockMultiply;
122 
123 namespace details {
124 
125 /*
126  * Compute work range = (begin, end) such that adjacent threads/blocks write to
127  * separate cache lines
128  */
129 template <typename scalar_type, typename execution_space, typename size_type>
130 KOKKOS_INLINE_FUNCTION
131 Kokkos::pair<size_type, size_type>
133  const size_type work_count,
134  const size_type thread_count,
135  const size_type thread_rank)
136 {
137 #if defined( KOKKOS_ENABLE_CUDA )
138  enum { cache_line =
139  std::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
140 #else
141  enum { cache_line = 64 };
142 #endif
143 
144  enum { work_align = cache_line / sizeof(scalar_type) };
145  enum { work_shift = Kokkos::Impl::power_of_two< work_align >::value };
146  enum { work_mask = work_align - 1 };
147 
148  const size_type work_per_thread =
149  ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
150  thread_count ) << work_shift ;
151 
152  size_type work_begin = thread_rank * work_per_thread;
153  size_type work_end = work_begin + work_per_thread;
154  if (work_begin > work_count)
155  work_begin = work_count;
156  if (work_end > work_count)
157  work_end = work_count;
158 
159  return Kokkos::make_pair(work_begin, work_end);
160 }
161 
162 // Functor implementing assignment update for multiply kernels
164  template <typename Scalar>
165  KOKKOS_INLINE_FUNCTION
166  void operator()(Scalar& y, const Scalar& x) const { y = x; }
167 };
168 
169 // Functor implementing += update for multiply kernels
171  template <typename Scalar>
172  KOKKOS_INLINE_FUNCTION
173  void operator()(Scalar& y, const Scalar& x) const { y += x; }
174 };
175 
176 // Functor implementing scaled assignment update for multiply kernels
177 template <typename Value>
179  const Value a;
180  MultiplyScaledAssign(const Value& a_) : a(a_) {}
181  template <typename Scalar>
182  KOKKOS_INLINE_FUNCTION
183  void operator()(Scalar& y, const Scalar& x) const { y = a*x; }
184 };
185 
186 // Functor implementing += update for multiply kernels
187 template <typename Value>
189  const Value a;
190  MultiplyScaledUpdate(const Value& a_) : a(a_) {}
191  template <typename Scalar>
192  KOKKOS_INLINE_FUNCTION
193  void operator()(Scalar& y, const Scalar& x) const { y += a*x; }
194 };
195 
196 // Functor implementing saxpby update for multiply kernels
197 template <typename Value>
199  const Value a;
200  const Value b;
201  MultiplyScaledUpdate2(const Value& a_, const Value& b_) : a(a_), b(b_) {}
202  template <typename Scalar>
203  KOKKOS_INLINE_FUNCTION
204  void operator()(Scalar& y, const Scalar& x) const { y = a*x + b*y; }
205 };
206 
207 } // namespace details
208 
209 } // namespace Stokhos
210 
211 #endif
IntegralRank< T::Rank > type
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
Kokkos::DefaultExecutionSpace device