Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_OpenMP_MKL_CrsMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_OPENMP_MKL_CRSMATRIX_HPP
43 #define STOKHOS_OPENMP_MKL_CRSMATRIX_HPP
44 
45 #include "Kokkos_Macros.hpp"
46 #include "Stokhos_ConfigDefs.h"
47 #if defined(KOKKOS_ENABLE_OPENMP) && defined(HAVE_STOKHOS_MKL)
48 
49 #include "Kokkos_Core.hpp"
50 #include "Stokhos_CrsMatrix.hpp"
51 #include "mkl.h"
52 
53 // Implementation of CrsMatrix multiply using MKL and OpenMP
54 
55 namespace Stokhos {
56 
57 namespace Impl {
58 
59  template <typename ValueType, typename OrdinalType, typename ExecutionSpace>
60  struct GatherTranspose {
61  typedef ValueType value_type;
62  typedef OrdinalType ordinal_type;
63  typedef ExecutionSpace execution_space;
64  typedef typename execution_space::size_type size_type;
65  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > multi_vector_type ;
66  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
67 
68  const multi_vector_type m_x;
69  const trans_multi_vector_type m_xt;
70  const std::vector<ordinal_type> m_indices;
71  const size_t ncol;
72  GatherTranspose(const multi_vector_type & x,
73  const trans_multi_vector_type& xt,
74  const std::vector<ordinal_type> & indices) :
75  m_x(x), m_xt(xt), m_indices(indices), ncol(indices.size()) {}
76 
77  inline void operator()( const size_type row ) const {
78  for (size_t col=0; col<ncol; ++col)
79  m_xt(col,row) = m_x(row,m_indices[col]);
80  }
81 
82  static void apply(const multi_vector_type & x,
83  const trans_multi_vector_type& xt,
84  const std::vector<ordinal_type> & indices) {
85  const size_t n = xt.extent(1);
86  Kokkos::parallel_for( n, GatherTranspose(x,xt,indices) );
87  }
88  };
89 
90  template <typename ValueType, typename OrdinalType, typename ExecutionSpace>
91  struct ScatterTranspose {
92  typedef ValueType value_type;
93  typedef OrdinalType ordinal_type;
94  typedef ExecutionSpace execution_space;
95  typedef typename execution_space::size_type size_type;
96  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > multi_vector_type ;
97  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
98 
99  const multi_vector_type m_x;
100  const trans_multi_vector_type m_xt;
101  const std::vector<ordinal_type> m_indices;
102  const size_t ncol;
103  ScatterTranspose(const multi_vector_type & x,
104  const trans_multi_vector_type& xt,
105  const std::vector<ordinal_type> & indices) :
106  m_x(x), m_xt(xt), m_indices(indices), ncol(indices.size()) {}
107 
108  inline void operator()( const size_type row ) const {
109  for (size_t col=0; col<ncol; ++col)
110  m_x(row,m_indices[col]) = m_xt(col,row);
111  }
112 
113  static void apply(const multi_vector_type & x,
114  const trans_multi_vector_type& xt,
115  const std::vector<ordinal_type> & indices) {
116  const size_t n = xt.extent(1);
117  Kokkos::parallel_for( n, ScatterTranspose(x,xt,indices) );
118  }
119  };
120 
121  template <typename ValueType, typename ExecutionSpace>
122  struct GatherVecTranspose {
123  typedef ValueType value_type;
124  typedef ExecutionSpace execution_space;
125  typedef typename execution_space::size_type size_type;
126  typedef Kokkos::View< value_type* , execution_space > vector_type ;
127  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
128 
129  const std::vector<vector_type> m_x;
130  const trans_multi_vector_type m_xt;
131  const size_t ncol;
132  GatherVecTranspose(const std::vector<vector_type> & x,
133  const trans_multi_vector_type& xt) :
134  m_x(x), m_xt(xt), ncol(x.size()) {}
135 
136  inline void operator()( const size_type row ) const {
137  for (size_t col=0; col<ncol; ++col)
138  m_xt(col,row) = m_x[col](row);
139  }
140 
141  static void apply(const std::vector<vector_type> & x,
142  const trans_multi_vector_type& xt) {
143  const size_t n = xt.extent(1);
144  Kokkos::parallel_for( n, GatherVecTranspose(x,xt) );
145  }
146  };
147 
148  template <typename ValueType, typename ExecutionSpace>
149  struct ScatterVecTranspose {
150  typedef ValueType value_type;
151  typedef ExecutionSpace execution_space;
152  typedef typename execution_space::size_type size_type;
153  typedef Kokkos::View< value_type* , execution_space > vector_type ;
154  typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
155 
156  const std::vector<vector_type> m_x;
157  const trans_multi_vector_type m_xt;
158  const size_t ncol;
159  ScatterVecTranspose(const std::vector<vector_type> & x,
160  const trans_multi_vector_type& xt) :
161  m_x(x), m_xt(xt), ncol(x.size()) {}
162 
163  inline void operator()( const size_type row ) const {
164  for (size_t col=0; col<ncol; ++col)
165  m_x[col](row) = m_xt(col,row);
166  }
167 
168  static void apply(const std::vector<vector_type> & x,
169  const trans_multi_vector_type& xt) {
170  const size_t n = xt.extent(1);
171  Kokkos::parallel_for( n, ScatterVecTranspose(x,xt) );
172  }
173  };
174 
175 } // namespace Impl
176 
177 // Specializations of multiply using Intel MKL
178 class MKLMultiply {};
179 
180 void multiply(const CrsMatrix< double , Kokkos::OpenMP >& A,
181  const Kokkos::View< double* , Kokkos::OpenMP >& x,
182  Kokkos::View< double* , Kokkos::OpenMP >& y,
183  MKLMultiply tag)
184 {
185  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
186  double *A_values = A.values.data() ;
187  MKL_INT *col_indices = A.graph.entries.data() ;
188  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
189  MKL_INT *row_end = row_beg+1;
190  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
191  char trans = 'N';
192  double alpha = 1.0;
193  double beta = 0.0;
194 
195  double *x_values = x.data() ;
196  double *y_values = y.data() ;
197 
198  mkl_dcsrmv(&trans, &n, &n, &alpha, matdescra, A_values, col_indices,
199  row_beg, row_end, x_values, &beta, y_values);
200 }
201 
202 void multiply(const CrsMatrix< float , Kokkos::OpenMP >& A,
203  const Kokkos::View< float* , Kokkos::OpenMP >& x,
204  Kokkos::View< float* , Kokkos::OpenMP >& y,
205  MKLMultiply tag)
206 {
207  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
208  float *A_values = A.values.data() ;
209  MKL_INT *col_indices = A.graph.entries.data() ;
210  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
211  MKL_INT *row_end = row_beg+1;
212  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
213  char trans = 'N';
214  float alpha = 1.0;
215  float beta = 0.0;
216 
217  float *x_values = x.data() ;
218  float *y_values = y.data() ;
219 
220  mkl_scsrmv(&trans, &n, &n, &alpha, matdescra, A_values, col_indices,
221  row_beg, row_end, x_values, &beta, y_values);
222 }
223 
224 void multiply(const CrsMatrix< double , Kokkos::OpenMP >& A,
225  const std::vector< Kokkos::View< double* , Kokkos::OpenMP > >& x,
226  std::vector< Kokkos::View< double* , Kokkos::OpenMP > >& y,
227  MKLMultiply tag)
228 {
229  typedef Kokkos::OpenMP execution_space ;
230  typedef double value_type ;
231  typedef Kokkos::View< double** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
232 
233  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
234  double *A_values = A.values.data() ;
235  MKL_INT *col_indices = A.graph.entries.data() ;
236  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
237  MKL_INT *row_end = row_beg+1;
238  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
239  char trans = 'N';
240  double alpha = 1.0;
241  double beta = 0.0;
242 
243  // Copy columns of x into a contiguous vector
244  MKL_INT ncol = x.size();
245  trans_multi_vector_type xx( "xx" , ncol , n );
246  trans_multi_vector_type yy( "yy" , ncol , n );
247  Impl::GatherVecTranspose<value_type,execution_space>::apply(x,xx);
248  double *x_values = xx.data() ;
249  double *y_values = yy.data() ;
250 
251  // Call MKLs CSR x multi-vector (row-based) multiply
252  mkl_dcsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
253  row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
254 
255  // Copy columns out of continguous multivector
256  Impl::ScatterVecTranspose<value_type,execution_space>::apply(y,yy);
257 }
258 
259 void multiply(const CrsMatrix< float , Kokkos::OpenMP >& A,
260  const std::vector< Kokkos::View< float* , Kokkos::OpenMP > >& x,
261  std::vector< Kokkos::View< float* , Kokkos::OpenMP > >& y,
262  MKLMultiply tag)
263 {
264  typedef Kokkos::OpenMP execution_space ;
265  typedef float value_type ;
266  typedef Kokkos::View< float** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
267 
268  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
269  float *A_values = A.values.data() ;
270  MKL_INT *col_indices = A.graph.entries.data() ;
271  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
272  MKL_INT *row_end = row_beg+1;
273  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
274  char trans = 'N';
275  float alpha = 1.0;
276  float beta = 0.0;
277 
278  // Copy columns of x into a contiguous vector
279  MKL_INT ncol = x.size();
280  trans_multi_vector_type xx( "xx" , ncol , n );
281  trans_multi_vector_type yy( "yy" , ncol , n );
282  Impl::GatherVecTranspose<value_type,execution_space>::apply(x,xx);
283  float *x_values = xx.data() ;
284  float *y_values = yy.data() ;
285 
286  // Call MKLs CSR x multi-vector (row-based) multiply
287  mkl_scsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
288  row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
289 
290  // Copy columns out of continguous multivector
291  Impl::ScatterVecTranspose<value_type,execution_space>::apply(y,yy);
292 }
293 
294 template <typename ordinal_type>
295 void multiply(
296  const CrsMatrix< double , Kokkos::OpenMP >& A,
297  const Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::OpenMP >& x,
298  Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::OpenMP >& y,
299  const std::vector<ordinal_type>& indices,
300  MKLMultiply tag)
301 {
302  typedef Kokkos::OpenMP execution_space ;
303  typedef double value_type ;
304  typedef Kokkos::View< double** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
305 
306  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
307  double *A_values = A.values.data() ;
308  MKL_INT *col_indices = A.graph.entries.data() ;
309  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
310  MKL_INT *row_end = row_beg+1;
311  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
312  char trans = 'N';
313  double alpha = 1.0;
314  double beta = 0.0;
315 
316  // Copy columns of x into a contiguous vector
317  MKL_INT ncol = indices.size();
318  trans_multi_vector_type xx( "xx" , ncol , n );
319  trans_multi_vector_type yy( "yy" , ncol , n );
320  Impl::GatherTranspose<value_type,ordinal_type,execution_space>::apply(x,xx,indices);
321  double *x_values = xx.data() ;
322  double *y_values = yy.data() ;
323 
324  // Call MKLs CSR x multi-vector (row-based) multiply
325  mkl_dcsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
326  row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
327 
328  // Copy columns out of continguous multivector
329  Impl::ScatterTranspose<value_type,ordinal_type,execution_space>::apply(y,yy,indices);
330 }
331 
332 template <typename ordinal_type>
333 void multiply(
334  const CrsMatrix< float , Kokkos::OpenMP >& A,
335  const Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::OpenMP >& x,
336  Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::OpenMP >& y,
337  const std::vector<ordinal_type>& indices,
338  MKLMultiply tag)
339 {
340  typedef Kokkos::OpenMP execution_space ;
341  typedef float value_type ;
342  typedef Kokkos::View< float** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
343 
344  MKL_INT n = A.graph.row_map.extent(0) - 1 ;
345  float *A_values = A.values.data() ;
346  MKL_INT *col_indices = A.graph.entries.data() ;
347  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
348  MKL_INT *row_end = row_beg+1;
349  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
350  char trans = 'N';
351  float alpha = 1.0;
352  float beta = 0.0;
353 
354  // Copy columns of x into a contiguous vector
355  MKL_INT ncol = indices.size();
356  trans_multi_vector_type xx( "xx" , ncol , n );
357  trans_multi_vector_type yy( "yy" , ncol , n );
358  Impl::GatherTranspose<value_type,ordinal_type,execution_space>::apply(x,xx,indices);
359  float *x_values = xx.data() ;
360  float *y_values = yy.data() ;
361 
362  // Call MKLs CSR x multi-vector (row-based) multiply
363  mkl_scsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
364  row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
365 
366  // Copy columns out of continguous multivector
367  Impl::ScatterTranspose<value_type,ordinal_type,execution_space>::apply(y,yy,indices);
368 }
369 
370 //----------------------------------------------------------------------------
371 
372 } // namespace Stokhos
373 
374 #endif
375 
376 #endif /* #ifndef STOKHOS_OPENMP_MKL_CRSMATRIX_HPP */
Kokkos::DefaultExecutionSpace execution_space
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
int n