42 #ifndef STOKHOS_MULTIPLY_HPP
43 #define STOKHOS_MULTIPLY_HPP
49 #include "Kokkos_Core.hpp"
63 template <
typename T>
struct ViewRank< std::vector<T> > {
67 template <
typename MatrixType,
68 typename InputVectorType,
69 typename OutputVectorType,
70 typename ColumnIndicesType = void,
75 template <
typename MatrixType,
76 typename InputVectorType,
77 typename OutputVectorType>
79 const InputVectorType& x,
80 OutputVectorType& y) {
82 multiply_type::apply( A, x, y );
85 template <
typename MatrixType,
86 typename InputVectorType,
87 typename OutputVectorType>
89 const InputVectorType& x,
93 multiply_type::apply( A, x, y );
96 template <
typename MatrixType,
97 typename InputVectorType,
98 typename OutputVectorType,
99 typename ColumnIndicesType>
101 const InputVectorType& x,
103 const ColumnIndicesType& col) {
105 multiply_type::apply( A, x, y, col );
108 template <
typename MatrixType,
109 typename InputVectorType,
110 typename OutputVectorType,
111 typename ColumnIndicesType>
113 const InputVectorType& x,
115 const ColumnIndicesType& col,
118 multiply_type::apply( A, x, y, col );
129 template <
typename scalar_type,
typename execution_space,
typename size_type>
130 KOKKOS_INLINE_FUNCTION
131 Kokkos::pair<size_type, size_type>
133 const size_type work_count,
134 const size_type thread_count,
135 const size_type thread_rank)
137 #if defined( KOKKOS_ENABLE_CUDA )
139 std::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
141 enum { cache_line = 64 };
144 enum { work_align = cache_line /
sizeof(
scalar_type) };
145 enum { work_shift = Kokkos::Impl::power_of_two< work_align >::value };
146 enum { work_mask = work_align - 1 };
148 const size_type work_per_thread =
149 ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
150 thread_count ) << work_shift ;
152 size_type work_begin = thread_rank * work_per_thread;
153 size_type work_end = work_begin + work_per_thread;
154 if (work_begin > work_count)
155 work_begin = work_count;
156 if (work_end > work_count)
157 work_end = work_count;
159 return Kokkos::make_pair(work_begin, work_end);
164 template <
typename Scalar>
165 KOKKOS_INLINE_FUNCTION
171 template <
typename Scalar>
172 KOKKOS_INLINE_FUNCTION
177 template <
typename Value>
181 template <
typename Scalar>
182 KOKKOS_INLINE_FUNCTION
187 template <
typename Value>
191 template <
typename Scalar>
192 KOKKOS_INLINE_FUNCTION
197 template <
typename Value>
202 template <
typename Scalar>
203 KOKKOS_INLINE_FUNCTION
IntegralRank< T::Rank > type
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
MultiplyScaledAssign(const Value &a_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
MultiplyScaledUpdate(const Value &a_)
IntegralRank< T::Rank > type