10 #ifndef STOKHOS_MULTIPLY_HPP
11 #define STOKHOS_MULTIPLY_HPP
17 #include "Kokkos_Core.hpp"
28 template <size_t N, bool OK = is_power_of_two<N>::value>
54 template <
typename T>
struct ViewRank< std::vector<T> > {
58 template <
typename MatrixType,
59 typename InputVectorType,
60 typename OutputVectorType,
61 typename ColumnIndicesType = void,
66 template <
typename MatrixType,
67 typename InputVectorType,
68 typename OutputVectorType>
70 const InputVectorType& x,
71 OutputVectorType& y) {
73 multiply_type::apply( A, x, y );
80 template<
class... Ts>
struct make_void {
typedef void type; };
82 using replace_me_with_void_t_in_cxx17 =
83 typename make_void<Ts...>
::type;
85 template<
class T,
class = replace_me_with_
void_t_in_cxx17<> >
86 struct const_type_impl {
91 struct const_type_impl<T,
92 replace_me_with_void_t_in_cxx17<typename T::const_type> > {
93 using type =
typename T::const_type;
97 using const_type_t =
typename const_type_impl<T>::type;
101 template <
typename MatrixType,
102 typename InputVectorType,
103 typename OutputVectorType>
105 const InputVectorType& x,
111 using input_vector_type = const_type_t<InputVectorType>;
112 using multiply_type =
114 multiply_type::apply( A, x, y );
117 template <
typename MatrixType,
118 typename InputVectorType,
119 typename OutputVectorType,
120 typename ColumnIndicesType>
122 const InputVectorType& x,
124 const ColumnIndicesType& col) {
126 multiply_type::apply( A, x, y, col );
129 template <
typename MatrixType,
130 typename InputVectorType,
131 typename OutputVectorType,
132 typename ColumnIndicesType>
134 const InputVectorType& x,
136 const ColumnIndicesType& col,
139 multiply_type::apply( A, x, y, col );
150 template <
typename scalar_type,
typename execution_space,
typename size_type>
151 KOKKOS_INLINE_FUNCTION
152 Kokkos::pair<size_type, size_type>
154 const size_type work_count,
155 const size_type thread_count,
156 const size_type thread_rank)
158 #if defined( KOKKOS_ENABLE_CUDA )
160 std::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
162 enum { cache_line = 64 };
165 enum { work_align = cache_line /
sizeof(
scalar_type) };
167 enum { work_mask = work_align - 1 };
169 const size_type work_per_thread =
170 ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
171 thread_count ) << work_shift ;
173 size_type work_begin = thread_rank * work_per_thread;
174 size_type work_end = work_begin + work_per_thread;
175 if (work_begin > work_count)
176 work_begin = work_count;
177 if (work_end > work_count)
178 work_end = work_count;
180 return Kokkos::make_pair(work_begin, work_end);
185 template <
typename Scalar>
186 KOKKOS_INLINE_FUNCTION
192 template <
typename Scalar>
193 KOKKOS_INLINE_FUNCTION
198 template <
typename Value>
202 template <
typename Scalar>
203 KOKKOS_INLINE_FUNCTION
208 template <
typename Value>
212 template <
typename Scalar>
213 KOKKOS_INLINE_FUNCTION
218 template <
typename Value>
223 template <
typename Scalar>
224 KOKKOS_INLINE_FUNCTION
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
MultiplyScaledAssign(const Value &a_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
IntegralRank< T::rank > type
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
MultiplyScaledUpdate(const Value &a_)
IntegralRank< T::rank > type