42 #ifndef STOKHOS_MULTIPLY_HPP
43 #define STOKHOS_MULTIPLY_HPP
49 #include "Kokkos_Core.hpp"
63 template <
typename T>
struct ViewRank< std::vector<T> > {
67 template <
typename MatrixType,
68 typename InputVectorType,
69 typename OutputVectorType,
70 typename ColumnIndicesType = void,
75 template <
typename MatrixType,
76 typename InputVectorType,
77 typename OutputVectorType>
79 const InputVectorType& x,
80 OutputVectorType& y) {
82 multiply_type::apply( A, x, y );
89 template<
class... Ts>
struct make_void {
typedef void type; };
91 using replace_me_with_void_t_in_cxx17 =
92 typename make_void<Ts...>::type;
94 template<
class T,
class = replace_me_with_
void_t_in_cxx17<> >
95 struct const_type_impl {
100 struct const_type_impl<T,
101 replace_me_with_void_t_in_cxx17<typename T::const_type> > {
102 using type =
typename T::const_type;
106 using const_type_t =
typename const_type_impl<T>::type;
110 template <
typename MatrixType,
111 typename InputVectorType,
112 typename OutputVectorType>
114 const InputVectorType& x,
120 using input_vector_type = const_type_t<InputVectorType>;
121 using multiply_type =
123 multiply_type::apply( A, x, y );
126 template <
typename MatrixType,
127 typename InputVectorType,
128 typename OutputVectorType,
129 typename ColumnIndicesType>
131 const InputVectorType& x,
133 const ColumnIndicesType& col) {
135 multiply_type::apply( A, x, y, col );
138 template <
typename MatrixType,
139 typename InputVectorType,
140 typename OutputVectorType,
141 typename ColumnIndicesType>
143 const InputVectorType& x,
145 const ColumnIndicesType& col,
148 multiply_type::apply( A, x, y, col );
159 template <
typename scalar_type,
typename execution_space,
typename size_type>
160 KOKKOS_INLINE_FUNCTION
161 Kokkos::pair<size_type, size_type>
163 const size_type work_count,
164 const size_type thread_count,
165 const size_type thread_rank)
167 #if defined( KOKKOS_ENABLE_CUDA )
169 std::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
171 enum { cache_line = 64 };
174 enum { work_align = cache_line /
sizeof(
scalar_type) };
175 enum { work_shift = Kokkos::Impl::power_of_two< work_align >::value };
176 enum { work_mask = work_align - 1 };
178 const size_type work_per_thread =
179 ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
180 thread_count ) << work_shift ;
182 size_type work_begin = thread_rank * work_per_thread;
183 size_type work_end = work_begin + work_per_thread;
184 if (work_begin > work_count)
185 work_begin = work_count;
186 if (work_end > work_count)
187 work_end = work_count;
189 return Kokkos::make_pair(work_begin, work_end);
194 template <
typename Scalar>
195 KOKKOS_INLINE_FUNCTION
201 template <
typename Scalar>
202 KOKKOS_INLINE_FUNCTION
207 template <
typename Value>
211 template <
typename Scalar>
212 KOKKOS_INLINE_FUNCTION
217 template <
typename Value>
221 template <
typename Scalar>
222 KOKKOS_INLINE_FUNCTION
227 template <
typename Value>
232 template <
typename Scalar>
233 KOKKOS_INLINE_FUNCTION
IntegralRank< T::Rank > type
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
MultiplyScaledAssign(const Value &a_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
MultiplyScaledUpdate(const Value &a_)
IntegralRank< T::Rank > type