17 #include <cusp/array1d.h> 
   19 #include <cusp/detail/format_utils.h> 
   31 template <
typename IndexType,
 
   35       const IndexType xnum_row,
 
   36       const IndexType xnum_cols,
 
   39                        const ValueType * Aval,
 
   43     const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
 
   44     const IndexType grid_size = gridDim.x * blockDim.x;
 
   47     for(IndexType row = thread_id; row < Anum_rows; row += grid_size)
 
   49         const IndexType row_start = Ar[row];
 
   50         const IndexType row_end   = Ar[row+1];
 
   51   const IndexType r = row_end - row_start;  
 
   53   for (IndexType 
j = 0; 
j < xnum_cols; 
j++){
 
   56                 for (IndexType jj = row_start; jj < row_end; jj++)
 
   57                     sum += Aval[jj] * x[
j+xnum_cols*Ac[jj]];
 
   58     y[
j+xnum_cols*row]=
sum;
 
   67 template <
typename IndexType,
 
   71       const IndexType xnum_rows,
 
   72                         const IndexType xnum_cols,
 
   75                        const ValueType * Aval,
 
   79     const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
 
   80     const IndexType grid_size = gridDim.x * blockDim.x;
 
   81     for(IndexType row = thread_id; row < Anum_rows; row += grid_size){
 
   82   const IndexType row_start = Ar[row];
 
   83         const IndexType row_end   = Ar[row+1];
 
   85   for (IndexType 
j = 0; 
j < xnum_cols; 
j++){
 
   88                 for (IndexType jj = row_start; jj < row_end; jj++)
 
   89                     sum += Aval[jj] * x[Ac[jj]+xnum_rows*
j];
 
   90     y[
j*Anum_rows+row]=
sum;
 
  106 template <
typename Matrix1,
 
  111                     Vector3& y, cusp::row_major)
 
  113   CUSP_PROFILE_SCOPED();
 
  114   typedef typename Vector3::index_type   IndexType;
 
  116   typedef typename Vector3::memory_space MemorySpace;
 
  117   const size_t BLOCK_SIZE = 256;
 
  118   const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(row_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (
size_t) 0);
 
  119   const size_t NUM_BLOCKS = 
std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
 
  123   row_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE >>>
 
  124         (A.num_rows, x.num_rows, x.num_cols, 
 
  125          thrust::raw_pointer_cast(&A.row_offsets[0]),
 
  126          thrust::raw_pointer_cast(&A.column_indices[0]),
 
  127          thrust::raw_pointer_cast(&A.values[0]),
 
  128          thrust::raw_pointer_cast(&(x.values)[0]),
 
  129    thrust::raw_pointer_cast(&(y.values)[0]));
 
  133 template <
typename Matrix1,
 
  138                     Vector3& y, cusp::column_major)
 
  140         CUSP_PROFILE_SCOPED();
 
  141         typedef typename Vector3::index_type   IndexType;
 
  143         typedef typename Vector3::memory_space MemorySpace;
 
  144         const size_t BLOCK_SIZE = 256;
 
  145         const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(column_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (
size_t) 0);
 
  146         const size_t NUM_BLOCKS = 
std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
 
  147         column_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE>>>
 
  148         (A.num_rows, x.num_rows, x.num_cols,
 
  149          thrust::raw_pointer_cast(&A.row_offsets[0]),
 
  150          thrust::raw_pointer_cast(&A.column_indices[0]),
 
  151          thrust::raw_pointer_cast(&A.values[0]),
 
  152          thrust::raw_pointer_cast(&(x.values)[0]),
 
  153          thrust::raw_pointer_cast(&(y.values)[0]));
 
  158 template <
typename Matrix1,
 
void spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y)
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
__global__ void row_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_row, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
__global__ void column_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_rows, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
void __spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y, cusp::row_major)