Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
csr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2008-2009 NVIDIA Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <cusp/array1d.h>
18 
19 #include <cusp/detail/format_utils.h>
20 
21 //#include <iostream>
22 //#include <cusp/print.h>
23 namespace cusp
24 {
25 namespace detail
26 {
27 namespace device
28 {
29 
30 //Row-wise AX where X is a 2d array with one thread per row (based on scalar model)
31 template <typename IndexType,
32  typename ValueType>
33 __global__ void
34 row_spmm_csr_scalar_kernel(const IndexType Anum_rows,
35  const IndexType xnum_row,
36  const IndexType xnum_cols,
37  const IndexType * Ar,
38  const IndexType * Ac,
39  const ValueType * Aval,
40  const ValueType * x,
41  ValueType * y)
42 {
43  const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
44  const IndexType grid_size = gridDim.x * blockDim.x;
45 
46 
47  for(IndexType row = thread_id; row < Anum_rows; row += grid_size)
48  {
49  const IndexType row_start = Ar[row];
50  const IndexType row_end = Ar[row+1];
51  const IndexType r = row_end - row_start;
52 
53  for (IndexType j = 0; j < xnum_cols; j++){
54 
55  ValueType sum = 0.0;
56  for (IndexType jj = row_start; jj < row_end; jj++)
57  sum += Aval[jj] * x[j+xnum_cols*Ac[jj]];
58  y[j+xnum_cols*row]=sum;
59  }
60 
61  }
62 }
63 
64 
65 
66 //Col-wise with one thread per row
67 template <typename IndexType,
68  typename ValueType>
69 __global__ void
70 column_spmm_csr_scalar_kernel(const IndexType Anum_rows,
71  const IndexType xnum_rows,
72  const IndexType xnum_cols,
73  const IndexType * Ar,
74  const IndexType * Ac,
75  const ValueType * Aval,
76  const ValueType * x,
77  ValueType * y)
78 {
79  const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
80  const IndexType grid_size = gridDim.x * blockDim.x;
81  for(IndexType row = thread_id; row < Anum_rows; row += grid_size){
82  const IndexType row_start = Ar[row];
83  const IndexType row_end = Ar[row+1];
84 
85  for (IndexType j = 0; j < xnum_cols; j++){
86 
87  ValueType sum = 0;
88  for (IndexType jj = row_start; jj < row_end; jj++)
89  sum += Aval[jj] * x[Ac[jj]+xnum_rows*j];//x(Ac[jj], j)
90  y[j*Anum_rows+row]=sum;
91  }
92  }
93 }
94 
95 
104 
105 
106 template <typename Matrix1,
107  typename Vector2,
108  typename Vector3>
109 void __spmm_csr_scalar(const Matrix1& A,
110  const Vector2& x,
111  Vector3& y, cusp::row_major)
112 {
113  CUSP_PROFILE_SCOPED();
114  typedef typename Vector3::index_type IndexType;
115  typedef typename Vector3::value_type ValueType;
116  typedef typename Vector3::memory_space MemorySpace;
117  const size_t BLOCK_SIZE = 256;
118  const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(row_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (size_t) 0);
119  const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
120 
121 
122 
123  row_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE >>>
124  (A.num_rows, x.num_rows, x.num_cols,
125  thrust::raw_pointer_cast(&A.row_offsets[0]),
126  thrust::raw_pointer_cast(&A.column_indices[0]),
127  thrust::raw_pointer_cast(&A.values[0]),
128  thrust::raw_pointer_cast(&(x.values)[0]),
129  thrust::raw_pointer_cast(&(y.values)[0]));
130 
131 }
132 
133 template <typename Matrix1,
134  typename Vector2,
135  typename Vector3>
136 void __spmm_csr_scalar(const Matrix1& A,
137  const Vector2& x,
138  Vector3& y, cusp::column_major)
139 {
140  CUSP_PROFILE_SCOPED();
141  typedef typename Vector3::index_type IndexType;
142  typedef typename Vector3::value_type ValueType;
143  typedef typename Vector3::memory_space MemorySpace;
144  const size_t BLOCK_SIZE = 256;
145  const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(column_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (size_t) 0);
146  const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
147  column_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE>>>
148  (A.num_rows, x.num_rows, x.num_cols,
149  thrust::raw_pointer_cast(&A.row_offsets[0]),
150  thrust::raw_pointer_cast(&A.column_indices[0]),
151  thrust::raw_pointer_cast(&A.values[0]),
152  thrust::raw_pointer_cast(&(x.values)[0]),
153  thrust::raw_pointer_cast(&(y.values)[0]));
154 
155 }
156 
157 
158 template <typename Matrix1,
159  typename Vector2,
160  typename Vector3>
161 void spmm_csr_scalar(const Matrix1& A,
162  const Vector2& x,
163  Vector3& y)
164 {
165 //Determine if row-wise or col-wise then call appropriate multiply
166  __spmm_csr_scalar(A, x, y, typename Vector2::orientation());
167 }
168 } // end namespace device
169 } // end namespace detail
170 } // end namespace cusp
171 
void spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y)
Definition: csr.h:161
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
__global__ void row_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_row, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition: csr.h:34
__global__ void column_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_rows, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition: csr.h:70
Kokkos::DefaultExecutionSpace device
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
void __spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y, cusp::row_major)
Definition: csr.h:109