26 #include <cusp/array1d.h>
28 #include <cusp/detail/format_utils.h>
40 template <
typename IndexType,
44 const IndexType xnum_row,
45 const IndexType xnum_cols,
48 const ValueType * Aval,
52 const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
53 const IndexType grid_size = gridDim.x * blockDim.x;
56 for(IndexType row = thread_id; row < Anum_rows; row += grid_size)
58 const IndexType row_start = Ar[row];
59 const IndexType row_end = Ar[row+1];
60 const IndexType r = row_end - row_start;
62 for (IndexType
j = 0;
j < xnum_cols;
j++){
65 for (IndexType jj = row_start; jj < row_end; jj++)
66 sum += Aval[jj] * x[
j+xnum_cols*Ac[jj]];
67 y[
j+xnum_cols*row]=
sum;
76 template <
typename IndexType,
80 const IndexType xnum_rows,
81 const IndexType xnum_cols,
84 const ValueType * Aval,
88 const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
89 const IndexType grid_size = gridDim.x * blockDim.x;
90 for(IndexType row = thread_id; row < Anum_rows; row += grid_size){
91 const IndexType row_start = Ar[row];
92 const IndexType row_end = Ar[row+1];
94 for (IndexType
j = 0;
j < xnum_cols;
j++){
97 for (IndexType jj = row_start; jj < row_end; jj++)
98 sum += Aval[jj] * x[Ac[jj]+xnum_rows*
j];
99 y[
j*Anum_rows+row]=
sum;
115 template <
typename Matrix1,
120 Vector3& y, cusp::row_major)
122 CUSP_PROFILE_SCOPED();
123 typedef typename Vector3::index_type IndexType;
125 typedef typename Vector3::memory_space MemorySpace;
126 const size_t BLOCK_SIZE = 256;
127 const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(row_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (
size_t) 0);
128 const size_t NUM_BLOCKS =
std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
132 row_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE >>>
133 (A.num_rows, x.num_rows, x.num_cols,
134 thrust::raw_pointer_cast(&A.row_offsets[0]),
135 thrust::raw_pointer_cast(&A.column_indices[0]),
136 thrust::raw_pointer_cast(&A.values[0]),
137 thrust::raw_pointer_cast(&(x.values)[0]),
138 thrust::raw_pointer_cast(&(y.values)[0]));
142 template <
typename Matrix1,
147 Vector3& y, cusp::column_major)
149 CUSP_PROFILE_SCOPED();
150 typedef typename Vector3::index_type IndexType;
152 typedef typename Vector3::memory_space MemorySpace;
153 const size_t BLOCK_SIZE = 256;
154 const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(column_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (
size_t) 0);
155 const size_t NUM_BLOCKS =
std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
156 column_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE>>>
157 (A.num_rows, x.num_rows, x.num_cols,
158 thrust::raw_pointer_cast(&A.row_offsets[0]),
159 thrust::raw_pointer_cast(&A.column_indices[0]),
160 thrust::raw_pointer_cast(&A.values[0]),
161 thrust::raw_pointer_cast(&(x.values)[0]),
162 thrust::raw_pointer_cast(&(y.values)[0]));
167 template <
typename Matrix1,
void spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y)
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
__global__ void row_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_row, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
__global__ void column_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_rows, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
void __spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y, cusp::row_major)