Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_OpenMP_MKL_CrsMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef STOKHOS_OPENMP_MKL_CRSMATRIX_HPP
11 #define STOKHOS_OPENMP_MKL_CRSMATRIX_HPP
12 
13 #include "Kokkos_Macros.hpp"
14 #include "Stokhos_ConfigDefs.h"
15 #if defined(KOKKOS_ENABLE_OPENMP) && defined(HAVE_STOKHOS_MKL)
16 
17 #include "Kokkos_Core.hpp"
18 #include "Stokhos_CrsMatrix.hpp"
19 #include "mkl.h"
20 
21 // Implementation of CrsMatrix multiply using MKL and OpenMP
22 
23 namespace Stokhos {
24 
25 namespace Impl {
26 
27  template <typename ValueType, typename OrdinalType, typename ExecutionSpace>
28  struct GatherTranspose {
29  typedef ValueType value_type;
30  typedef OrdinalType ordinal_type;
31  typedef ExecutionSpace execution_space;
32  typedef typename execution_space::size_type size_type;
33  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > multi_vector_type ;
34  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
35 
36  const multi_vector_type m_x;
37  const trans_multi_vector_type m_xt;
38  const std::vector<ordinal_type> m_indices;
39  const size_t ncol;
40  GatherTranspose(const multi_vector_type & x,
41  const trans_multi_vector_type& xt,
42  const std::vector<ordinal_type> & indices) :
43  m_x(x), m_xt(xt), m_indices(indices), ncol(indices.size()) {}
44 
45  inline void operator()( const size_type row ) const {
46  for (size_t col=0; col<ncol; ++col)
47  m_xt(col,row) = m_x(row,m_indices[col]);
48  }
49 
50  static void apply(const multi_vector_type & x,
51  const trans_multi_vector_type& xt,
52  const std::vector<ordinal_type> & indices) {
53  const size_t n = xt.extent(1);
54  Kokkos::parallel_for( n, GatherTranspose(x,xt,indices) );
55  }
56  };
57 
58  template <typename ValueType, typename OrdinalType, typename ExecutionSpace>
59  struct ScatterTranspose {
60  typedef ValueType value_type;
61  typedef OrdinalType ordinal_type;
62  typedef ExecutionSpace execution_space;
63  typedef typename execution_space::size_type size_type;
64  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > multi_vector_type ;
65  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
66 
67  const multi_vector_type m_x;
68  const trans_multi_vector_type m_xt;
69  const std::vector<ordinal_type> m_indices;
70  const size_t ncol;
71  ScatterTranspose(const multi_vector_type & x,
72  const trans_multi_vector_type& xt,
73  const std::vector<ordinal_type> & indices) :
74  m_x(x), m_xt(xt), m_indices(indices), ncol(indices.size()) {}
75 
76  inline void operator()( const size_type row ) const {
77  for (size_t col=0; col<ncol; ++col)
78  m_x(row,m_indices[col]) = m_xt(col,row);
79  }
80 
81  static void apply(const multi_vector_type & x,
82  const trans_multi_vector_type& xt,
83  const std::vector<ordinal_type> & indices) {
84  const size_t n = xt.extent(1);
85  Kokkos::parallel_for( n, ScatterTranspose(x,xt,indices) );
86  }
87  };
88 
89  template <typename ValueType, typename ExecutionSpace>
90  struct GatherVecTranspose {
91  typedef ValueType value_type;
92  typedef ExecutionSpace execution_space;
93  typedef typename execution_space::size_type size_type;
94  typedef Kokkos::View< value_type* , execution_space > vector_type ;
95  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
96 
97  const std::vector<vector_type> m_x;
98  const trans_multi_vector_type m_xt;
99  const size_t ncol;
100  GatherVecTranspose(const std::vector<vector_type> & x,
101  const trans_multi_vector_type& xt) :
102  m_x(x), m_xt(xt), ncol(x.size()) {}
103 
104  inline void operator()( const size_type row ) const {
105  for (size_t col=0; col<ncol; ++col)
106  m_xt(col,row) = m_x[col](row);
107  }
108 
109  static void apply(const std::vector<vector_type> & x,
110  const trans_multi_vector_type& xt) {
111  const size_t n = xt.extent(1);
112  Kokkos::parallel_for( n, GatherVecTranspose(x,xt) );
113  }
114  };
115 
116  template <typename ValueType, typename ExecutionSpace>
117  struct ScatterVecTranspose {
118  typedef ValueType value_type;
119  typedef ExecutionSpace execution_space;
120  typedef typename execution_space::size_type size_type;
121  typedef Kokkos::View< value_type* , execution_space > vector_type ;
122  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
123 
124  const std::vector<vector_type> m_x;
125  const trans_multi_vector_type m_xt;
126  const size_t ncol;
127  ScatterVecTranspose(const std::vector<vector_type> & x,
128  const trans_multi_vector_type& xt) :
129  m_x(x), m_xt(xt), ncol(x.size()) {}
130 
131  inline void operator()( const size_type row ) const {
132  for (size_t col=0; col<ncol; ++col)
133  m_x[col](row) = m_xt(col,row);
134  }
135 
136  static void apply(const std::vector<vector_type> & x,
137  const trans_multi_vector_type& xt) {
138  const size_t n = xt.extent(1);
139  Kokkos::parallel_for( n, ScatterVecTranspose(x,xt) );
140  }
141  };
142 
143 } // namespace Impl
144 
145 // Specializations of multiply using Intel MKL
146 class MKLMultiply {};
147 
148 void multiply(const CrsMatrix< double , Kokkos::OpenMP >& A,
149  const Kokkos::View< double* , Kokkos::OpenMP >& x,
150  Kokkos::View< double* , Kokkos::OpenMP >& y,
151  MKLMultiply tag)
152 {
153  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
154  double *A_values = A.values.data() ;
155  MKL_INT *col_indices = A.graph.entries.data() ;
156  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
157  MKL_INT *row_end = row_beg+1;
158  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
159  char trans = 'N';
160  double alpha = 1.0;
161  double beta = 0.0;
162 
163  double *x_values = x.data() ;
164  double *y_values = y.data() ;
165 
166  mkl_dcsrmv(&trans, &n, &n, &alpha, matdescra, A_values, col_indices,
167  row_beg, row_end, x_values, &beta, y_values);
168 }
169 
170 void multiply(const CrsMatrix< float , Kokkos::OpenMP >& A,
171  const Kokkos::View< float* , Kokkos::OpenMP >& x,
172  Kokkos::View< float* , Kokkos::OpenMP >& y,
173  MKLMultiply tag)
174 {
175  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
176  float *A_values = A.values.data() ;
177  MKL_INT *col_indices = A.graph.entries.data() ;
178  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
179  MKL_INT *row_end = row_beg+1;
180  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
181  char trans = 'N';
182  float alpha = 1.0;
183  float beta = 0.0;
184 
185  float *x_values = x.data() ;
186  float *y_values = y.data() ;
187 
188  mkl_scsrmv(&trans, &n, &n, &alpha, matdescra, A_values, col_indices,
189  row_beg, row_end, x_values, &beta, y_values);
190 }
191 
192 void multiply(const CrsMatrix< double , Kokkos::OpenMP >& A,
193  const std::vector< Kokkos::View< double* , Kokkos::OpenMP > >& x,
194  std::vector< Kokkos::View< double* , Kokkos::OpenMP > >& y,
195  MKLMultiply tag)
196 {
197  typedef Kokkos::OpenMP execution_space ;
198  typedef double value_type ;
199  typedef Kokkos::View< double** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
200 
201  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
202  double *A_values = A.values.data() ;
203  MKL_INT *col_indices = A.graph.entries.data() ;
204  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
205  MKL_INT *row_end = row_beg+1;
206  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
207  char trans = 'N';
208  double alpha = 1.0;
209  double beta = 0.0;
210 
211  // Copy columns of x into a contiguous vector
212  MKL_INT ncol = x.size();
213  trans_multi_vector_type xx( "xx" , ncol , n );
214  trans_multi_vector_type yy( "yy" , ncol , n );
215  Impl::GatherVecTranspose<value_type,execution_space>::apply(x,xx);
216  double *x_values = xx.data() ;
217  double *y_values = yy.data() ;
218 
219  // Call MKLs CSR x multi-vector (row-based) multiply
220  mkl_dcsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
221  row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
222 
223  // Copy columns out of continguous multivector
224  Impl::ScatterVecTranspose<value_type,execution_space>::apply(y,yy);
225 }
226 
227 void multiply(const CrsMatrix< float , Kokkos::OpenMP >& A,
228  const std::vector< Kokkos::View< float* , Kokkos::OpenMP > >& x,
229  std::vector< Kokkos::View< float* , Kokkos::OpenMP > >& y,
230  MKLMultiply tag)
231 {
232  typedef Kokkos::OpenMP execution_space ;
233  typedef float value_type ;
234  typedef Kokkos::View< float** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
235 
236  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
237  float *A_values = A.values.data() ;
238  MKL_INT *col_indices = A.graph.entries.data() ;
239  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
240  MKL_INT *row_end = row_beg+1;
241  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
242  char trans = 'N';
243  float alpha = 1.0;
244  float beta = 0.0;
245 
246  // Copy columns of x into a contiguous vector
247  MKL_INT ncol = x.size();
248  trans_multi_vector_type xx( "xx" , ncol , n );
249  trans_multi_vector_type yy( "yy" , ncol , n );
250  Impl::GatherVecTranspose<value_type,execution_space>::apply(x,xx);
251  float *x_values = xx.data() ;
252  float *y_values = yy.data() ;
253 
254  // Call MKLs CSR x multi-vector (row-based) multiply
255  mkl_scsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
256  row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
257 
258  // Copy columns out of continguous multivector
259  Impl::ScatterVecTranspose<value_type,execution_space>::apply(y,yy);
260 }
261 
262 template <typename ordinal_type>
263 void multiply(
264  const CrsMatrix< double , Kokkos::OpenMP >& A,
265  const Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::OpenMP >& x,
266  Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::OpenMP >& y,
267  const std::vector<ordinal_type>& indices,
268  MKLMultiply tag)
269 {
270  typedef Kokkos::OpenMP execution_space ;
271  typedef double value_type ;
272  typedef Kokkos::View< double** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
273 
274  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
275  double *A_values = A.values.data() ;
276  MKL_INT *col_indices = A.graph.entries.data() ;
277  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
278  MKL_INT *row_end = row_beg+1;
279  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
280  char trans = 'N';
281  double alpha = 1.0;
282  double beta = 0.0;
283 
284  // Copy columns of x into a contiguous vector
285  MKL_INT ncol = indices.size();
286  trans_multi_vector_type xx( "xx" , ncol , n );
287  trans_multi_vector_type yy( "yy" , ncol , n );
288  Impl::GatherTranspose<value_type,ordinal_type,execution_space>::apply(x,xx,indices);
289  double *x_values = xx.data() ;
290  double *y_values = yy.data() ;
291 
292  // Call MKLs CSR x multi-vector (row-based) multiply
293  mkl_dcsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
294  row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
295 
296  // Copy columns out of continguous multivector
297  Impl::ScatterTranspose<value_type,ordinal_type,execution_space>::apply(y,yy,indices);
298 }
299 
300 template <typename ordinal_type>
301 void multiply(
302  const CrsMatrix< float , Kokkos::OpenMP >& A,
303  const Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::OpenMP >& x,
304  Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::OpenMP >& y,
305  const std::vector<ordinal_type>& indices,
306  MKLMultiply tag)
307 {
308  typedef Kokkos::OpenMP execution_space ;
309  typedef float value_type ;
310  typedef Kokkos::View< float** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
311 
312  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
313  float *A_values = A.values.data() ;
314  MKL_INT *col_indices = A.graph.entries.data() ;
315  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
316  MKL_INT *row_end = row_beg+1;
317  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
318  char trans = 'N';
319  float alpha = 1.0;
320  float beta = 0.0;
321 
322  // Copy columns of x into a contiguous vector
323  MKL_INT ncol = indices.size();
324  trans_multi_vector_type xx( "xx" , ncol , n );
325  trans_multi_vector_type yy( "yy" , ncol , n );
326  Impl::GatherTranspose<value_type,ordinal_type,execution_space>::apply(x,xx,indices);
327  float *x_values = xx.data() ;
328  float *y_values = yy.data() ;
329 
330  // Call MKLs CSR x multi-vector (row-based) multiply
331  mkl_scsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
332  row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
333 
334  // Copy columns out of continguous multivector
335  Impl::ScatterTranspose<value_type,ordinal_type,execution_space>::apply(y,yy,indices);
336 }
337 
338 //----------------------------------------------------------------------------
339 
340 } // namespace Stokhos
341 
342 #endif
343 
344 #endif /* #ifndef STOKHOS_OPENMP_MKL_CRSMATRIX_HPP */
Kokkos::DefaultExecutionSpace execution_space
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
int n