Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
csr.h
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 /*
11  * Copyright 2008-2009 NVIDIA Corporation
12  *
13  * Licensed under the Apache License, Version 2.0 (the "License");
14  * you may not use this file except in compliance with the License.
15  * You may obtain a copy of the License at
16  *
17  * http://www.apache.org/licenses/LICENSE-2.0
18  *
19  * Unless required by applicable law or agreed to in writing, software
20  * distributed under the License is distributed on an "AS IS" BASIS,
21  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22  * See the License for the specific language governing permissions and
23  * limitations under the License.
24  */
25 
26 #include <cusp/array1d.h>
27 
28 #include <cusp/detail/format_utils.h>
29 
30 //#include <iostream>
31 //#include <cusp/print.h>
32 namespace cusp
33 {
34 namespace detail
35 {
36 namespace device
37 {
38 
39 //Row-wise AX where X is a 2d array with one thread per row (based on scalar model)
40 template <typename IndexType,
41  typename ValueType>
42 __global__ void
43 row_spmm_csr_scalar_kernel(const IndexType Anum_rows,
44  const IndexType xnum_row,
45  const IndexType xnum_cols,
46  const IndexType * Ar,
47  const IndexType * Ac,
48  const ValueType * Aval,
49  const ValueType * x,
50  ValueType * y)
51 {
52  const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
53  const IndexType grid_size = gridDim.x * blockDim.x;
54 
55 
56  for(IndexType row = thread_id; row < Anum_rows; row += grid_size)
57  {
58  const IndexType row_start = Ar[row];
59  const IndexType row_end = Ar[row+1];
60  const IndexType r = row_end - row_start;
61 
62  for (IndexType j = 0; j < xnum_cols; j++){
63 
64  ValueType sum = 0.0;
65  for (IndexType jj = row_start; jj < row_end; jj++)
66  sum += Aval[jj] * x[j+xnum_cols*Ac[jj]];
67  y[j+xnum_cols*row]=sum;
68  }
69 
70  }
71 }
72 
73 
74 
75 //Col-wise with one thread per row
76 template <typename IndexType,
77  typename ValueType>
78 __global__ void
79 column_spmm_csr_scalar_kernel(const IndexType Anum_rows,
80  const IndexType xnum_rows,
81  const IndexType xnum_cols,
82  const IndexType * Ar,
83  const IndexType * Ac,
84  const ValueType * Aval,
85  const ValueType * x,
86  ValueType * y)
87 {
88  const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
89  const IndexType grid_size = gridDim.x * blockDim.x;
90  for(IndexType row = thread_id; row < Anum_rows; row += grid_size){
91  const IndexType row_start = Ar[row];
92  const IndexType row_end = Ar[row+1];
93 
94  for (IndexType j = 0; j < xnum_cols; j++){
95 
96  ValueType sum = 0;
97  for (IndexType jj = row_start; jj < row_end; jj++)
98  sum += Aval[jj] * x[Ac[jj]+xnum_rows*j];//x(Ac[jj], j)
99  y[j*Anum_rows+row]=sum;
100  }
101  }
102 }
103 
104 
113 
114 
115 template <typename Matrix1,
116  typename Vector2,
117  typename Vector3>
118 void __spmm_csr_scalar(const Matrix1& A,
119  const Vector2& x,
120  Vector3& y, cusp::row_major)
121 {
122  CUSP_PROFILE_SCOPED();
123  typedef typename Vector3::index_type IndexType;
124  typedef typename Vector3::value_type ValueType;
125  typedef typename Vector3::memory_space MemorySpace;
126  const size_t BLOCK_SIZE = 256;
127  const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(row_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (size_t) 0);
128  const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
129 
130 
131 
132  row_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE >>>
133  (A.num_rows, x.num_rows, x.num_cols,
134  thrust::raw_pointer_cast(&A.row_offsets[0]),
135  thrust::raw_pointer_cast(&A.column_indices[0]),
136  thrust::raw_pointer_cast(&A.values[0]),
137  thrust::raw_pointer_cast(&(x.values)[0]),
138  thrust::raw_pointer_cast(&(y.values)[0]));
139 
140 }
141 
142 template <typename Matrix1,
143  typename Vector2,
144  typename Vector3>
145 void __spmm_csr_scalar(const Matrix1& A,
146  const Vector2& x,
147  Vector3& y, cusp::column_major)
148 {
149  CUSP_PROFILE_SCOPED();
150  typedef typename Vector3::index_type IndexType;
151  typedef typename Vector3::value_type ValueType;
152  typedef typename Vector3::memory_space MemorySpace;
153  const size_t BLOCK_SIZE = 256;
154  const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(column_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (size_t) 0);
155  const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
156  column_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE>>>
157  (A.num_rows, x.num_rows, x.num_cols,
158  thrust::raw_pointer_cast(&A.row_offsets[0]),
159  thrust::raw_pointer_cast(&A.column_indices[0]),
160  thrust::raw_pointer_cast(&A.values[0]),
161  thrust::raw_pointer_cast(&(x.values)[0]),
162  thrust::raw_pointer_cast(&(y.values)[0]));
163 
164 }
165 
166 
167 template <typename Matrix1,
168  typename Vector2,
169  typename Vector3>
170 void spmm_csr_scalar(const Matrix1& A,
171  const Vector2& x,
172  Vector3& y)
173 {
174 //Determine if row-wise or col-wise then call appropriate multiply
175  __spmm_csr_scalar(A, x, y, typename Vector2::orientation());
176 }
177 } // end namespace device
178 } // end namespace detail
179 } // end namespace cusp
void spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y)
Definition: csr.h:170
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
__global__ void row_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_row, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition: csr.h:43
__global__ void column_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_rows, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition: csr.h:79
Kokkos::DefaultExecutionSpace device
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
void __spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y, cusp::row_major)
Definition: csr.h:118