17 #include <cusp/array1d.h>
19 #include <cusp/detail/format_utils.h>
31 template <
typename IndexType,
35 const IndexType xnum_row,
36 const IndexType xnum_cols,
39 const ValueType * Aval,
43 const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
44 const IndexType grid_size = gridDim.x * blockDim.x;
47 for(IndexType row = thread_id; row < Anum_rows; row += grid_size)
49 const IndexType row_start = Ar[row];
50 const IndexType row_end = Ar[row+1];
51 const IndexType r = row_end - row_start;
53 for (IndexType
j = 0;
j < xnum_cols;
j++){
56 for (IndexType jj = row_start; jj < row_end; jj++)
57 sum += Aval[jj] * x[
j+xnum_cols*Ac[jj]];
58 y[
j+xnum_cols*row]=
sum;
67 template <
typename IndexType,
71 const IndexType xnum_rows,
72 const IndexType xnum_cols,
75 const ValueType * Aval,
79 const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
80 const IndexType grid_size = gridDim.x * blockDim.x;
81 for(IndexType row = thread_id; row < Anum_rows; row += grid_size){
82 const IndexType row_start = Ar[row];
83 const IndexType row_end = Ar[row+1];
85 for (IndexType
j = 0;
j < xnum_cols;
j++){
88 for (IndexType jj = row_start; jj < row_end; jj++)
89 sum += Aval[jj] * x[Ac[jj]+xnum_rows*
j];
90 y[
j*Anum_rows+row]=
sum;
106 template <
typename Matrix1,
111 Vector3& y, cusp::row_major)
113 CUSP_PROFILE_SCOPED();
114 typedef typename Vector3::index_type IndexType;
116 typedef typename Vector3::memory_space MemorySpace;
117 const size_t BLOCK_SIZE = 256;
118 const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(row_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (
size_t) 0);
119 const size_t NUM_BLOCKS =
std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
123 row_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE >>>
124 (A.num_rows, x.num_rows, x.num_cols,
125 thrust::raw_pointer_cast(&A.row_offsets[0]),
126 thrust::raw_pointer_cast(&A.column_indices[0]),
127 thrust::raw_pointer_cast(&A.values[0]),
128 thrust::raw_pointer_cast(&(x.values)[0]),
129 thrust::raw_pointer_cast(&(y.values)[0]));
133 template <
typename Matrix1,
138 Vector3& y, cusp::column_major)
140 CUSP_PROFILE_SCOPED();
141 typedef typename Vector3::index_type IndexType;
143 typedef typename Vector3::memory_space MemorySpace;
144 const size_t BLOCK_SIZE = 256;
145 const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(column_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (
size_t) 0);
146 const size_t NUM_BLOCKS =
std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
147 column_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE>>>
148 (A.num_rows, x.num_rows, x.num_cols,
149 thrust::raw_pointer_cast(&A.row_offsets[0]),
150 thrust::raw_pointer_cast(&A.column_indices[0]),
151 thrust::raw_pointer_cast(&A.values[0]),
152 thrust::raw_pointer_cast(&(x.values)[0]),
153 thrust::raw_pointer_cast(&(y.values)[0]));
158 template <
typename Matrix1,
void spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y)
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
__global__ void row_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_row, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
__global__ void column_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_rows, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
void __spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y, cusp::row_major)