42 #ifndef STOKHOS_CUDA_CRSMATRIX_HPP
43 #define STOKHOS_CUDA_CRSMATRIX_HPP
46 #ifdef HAVE_STOKHOS_CUSPARSE
52 #include <cuda_runtime.h>
54 #include <cusparse_v2.h>
56 #include "Kokkos_Core.hpp"
63 class CudaSparseSingleton {
66 cusparseStatus_t status;
67 cusparseHandle_t handle;
68 cusparseMatDescr_t descra;
70 static CudaSparseSingleton & singleton();
76 status = cusparseCreate(&handle);
77 if(status != CUSPARSE_STATUS_SUCCESS)
79 throw std::runtime_error( std::string(
"ERROR - CUSPARSE Library Initialization failed" ) );
82 status = cusparseCreateMatDescr(&descra);
83 if(status != CUSPARSE_STATUS_SUCCESS)
85 throw std::runtime_error( std::string(
"ERROR - CUSPARSE Library Matrix descriptor failed" ) );
88 cusparseSetMatType(descra , CUSPARSE_MATRIX_TYPE_GENERAL);
89 cusparseSetMatIndexBase(descra , CUSPARSE_INDEX_BASE_ZERO);
92 CudaSparseSingleton(
const CudaSparseSingleton & );
93 CudaSparseSingleton & operator = (
const CudaSparseSingleton & );
96 CudaSparseSingleton & CudaSparseSingleton::singleton()
98 static CudaSparseSingleton s ;
return s ;
103 CrsMatrix< float , Kokkos::Cuda > ,
104 Kokkos::View< float* , Kokkos::Cuda > ,
105 Kokkos::View< float* , Kokkos::Cuda > ,
111 typedef execution_space::size_type size_type ;
112 typedef Kokkos::View< float* , execution_space > vector_type ;
113 typedef CrsMatrix< float , execution_space > matrix_type ;
117 static void apply(
const matrix_type &
A ,
118 const vector_type & x ,
119 const vector_type & y )
121 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
122 const float alpha = 1 , beta = 0 ;
123 const int n = A.graph.row_map.extent(0) - 1 ;
124 const int nz = A.graph.entries.extent(0);
126 cusparseStatus_t status =
127 cusparseScsrmv( s.handle ,
128 CUSPARSE_OPERATION_NON_TRANSPOSE ,
133 A.graph.row_map.data() ,
134 A.graph.entries.data() ,
139 if ( CUSPARSE_STATUS_SUCCESS != status ) {
140 throw std::runtime_error( std::string(
"ERROR - cusparseScsrmv " ) );
147 CrsMatrix<
double , Kokkos::Cuda > ,
148 Kokkos::View< double* , Kokkos::Cuda > ,
149 Kokkos::View< double* , Kokkos::Cuda > ,
155 typedef execution_space::size_type size_type ;
156 typedef Kokkos::View< double* , execution_space > vector_type ;
157 typedef CrsMatrix< double , execution_space > matrix_type ;
161 static void apply(
const matrix_type & A ,
162 const vector_type & x ,
163 const vector_type & y )
165 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
166 const double alpha = 1 , beta = 0 ;
167 const int n = A.graph.row_map.extent(0) - 1 ;
168 const int nz = A.graph.entries.extent(0);
170 cusparseStatus_t status =
171 cusparseDcsrmv( s.handle ,
172 CUSPARSE_OPERATION_NON_TRANSPOSE ,
177 A.graph.row_map.data() ,
178 A.graph.entries.data() ,
183 if ( CUSPARSE_STATUS_SUCCESS != status ) {
184 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
189 template <
typename Ordinal>
191 CrsMatrix< float , Kokkos::Cuda > ,
192 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
193 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
194 std::vector<Ordinal> ,
199 typedef execution_space::size_type size_type;
200 typedef Kokkos::View< float*, Kokkos::LayoutLeft, execution_space > vector_type;
201 typedef Kokkos::View< float**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
202 typedef CrsMatrix< float , execution_space > matrix_type;
206 static void apply(
const matrix_type & A ,
207 const multi_vector_type & x ,
208 const multi_vector_type & y ,
209 const std::vector<Ordinal> & col_indices )
211 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
212 const float alpha = 1 , beta = 0 ;
213 const int n = A.graph.row_map.extent(0) - 1 ;
214 const int nz = A.graph.entries.extent(0);
215 const size_t ncol = col_indices.size();
218 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
219 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
221 for (
size_t col=0; col<ncol; col++) {
222 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
223 vector_type xx_view = Kokkos::subview( xx , span );
225 Kokkos::subview( x, Kokkos::ALL(), col_indices[col] );
230 cusparseStatus_t status =
231 cusparseScsrmm( s.handle ,
232 CUSPARSE_OPERATION_NON_TRANSPOSE ,
237 A.graph.row_map.data() ,
238 A.graph.entries.data() ,
245 if ( CUSPARSE_STATUS_SUCCESS != status ) {
246 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
250 for (
size_t col=0; col<ncol; col++) {
251 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
252 vector_type yy_view = Kokkos::subview( yy , span );
254 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
260 #define USE_CUSPARSE 1
263 template <
typename Ordinal>
265 CrsMatrix<
double , Kokkos::Cuda > ,
266 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
267 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
268 std::vector<Ordinal> ,
273 typedef execution_space::size_type size_type;
274 typedef Kokkos::View< double*, Kokkos::LayoutLeft, execution_space > vector_type;
275 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
276 typedef CrsMatrix< double , execution_space > matrix_type;
280 #define USE_TRANSPOSE 0
287 struct GatherTranspose {
289 typedef execution_space::size_type size_type;
291 multi_vector_type m_xt;
292 const multi_vector_type m_x;
293 const Kokkos::View<Ordinal*,execution_space> m_col;
294 const size_type m_ncol;
296 GatherTranspose( multi_vector_type& xt,
297 const multi_vector_type& x,
298 const Kokkos::View<Ordinal*,execution_space>& col ) :
299 m_xt(xt), m_x(x), m_col(col), m_ncol(col.extent(0)) {}
302 inline void operator() (size_type i)
const {
303 for (size_type
j=0;
j<m_ncol; ++
j)
304 m_xt(
j,i) = m_x(i,m_col(
j));
307 static void apply( multi_vector_type& xt,
308 const multi_vector_type& x,
309 const Kokkos::View<Ordinal*,execution_space>& col ) {
310 const size_type n = x.extent(0);
311 Kokkos::parallel_for( n , GatherTranspose(xt,x,col) );
315 static void apply(
const matrix_type & A ,
316 const multi_vector_type & x ,
317 const multi_vector_type & y ,
318 const std::vector<Ordinal> & col_indices )
320 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
321 const double alpha = 1 , beta = 0 ;
322 const int n = A.graph.row_map.extent(0) - 1 ;
323 const int nz = A.graph.entries.extent(0);
324 const size_t ncol = col_indices.size();
327 Kokkos::View<Ordinal*,execution_space> col_indices_dev(
328 Kokkos::ViewAllocateWithoutInitializing(
"col_indices"), ncol);
329 typename Kokkos::View<Ordinal*,execution_space>::HostMirror col_indices_host =
331 for (
size_t i=0; i<ncol; ++i)
332 col_indices_host(i) = col_indices[i];
336 multi_vector_type xx(
337 Kokkos::ViewAllocateWithoutInitializing(
"xx"), ncol , n );
338 GatherTranspose::apply(xx, x, col_indices_dev);
341 multi_vector_type yy(
342 Kokkos::ViewAllocateWithoutInitializing(
"yy"), n , ncol );
345 cusparseStatus_t status =
346 cusparseDcsrmm2( s.handle ,
347 CUSPARSE_OPERATION_NON_TRANSPOSE ,
348 CUSPARSE_OPERATION_TRANSPOSE ,
353 A.graph.row_map.data() ,
354 A.graph.entries.data() ,
361 if ( CUSPARSE_STATUS_SUCCESS != status ) {
362 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
366 for (
size_t col=0; col<ncol; col++) {
367 vector_type yy_view =
368 Kokkos::subview( yy , Kokkos::ALL(), col );
370 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
375 static void apply(
const matrix_type & A ,
376 const multi_vector_type & x ,
377 const multi_vector_type & y ,
378 const std::vector<Ordinal> & col_indices )
380 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
381 const double alpha = 1 , beta = 0 ;
382 const int n = A.graph.row_map.extent(0) - 1 ;
383 const int nz = A.graph.entries.extent(0);
384 const size_t ncol = col_indices.size();
387 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
388 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
390 for (
size_t col=0; col<ncol; col++) {
391 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
392 vector_type xx_view = Kokkos::subview( xx , span );
394 Kokkos::subview( x, Kokkos::ALL(), col_indices[col] );
399 cusparseStatus_t status =
400 cusparseDcsrmm( s.handle ,
401 CUSPARSE_OPERATION_NON_TRANSPOSE ,
406 A.graph.row_map.data() ,
407 A.graph.entries.data() ,
414 if ( CUSPARSE_STATUS_SUCCESS != status ) {
415 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
419 for (
size_t col=0; col<ncol; col++) {
420 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
421 vector_type yy_view = Kokkos::subview( yy , span );
423 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
432 CrsMatrix< float , Kokkos::Cuda > ,
433 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
434 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
440 typedef execution_space::size_type size_type;
441 typedef Kokkos::View< float**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
442 typedef CrsMatrix< float , execution_space > matrix_type;
446 static void apply(
const matrix_type & A ,
447 const multi_vector_type & x ,
448 const multi_vector_type & y )
450 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
451 const float alpha = 1 , beta = 0 ;
452 const int n = A.graph.row_map.extent(0) - 1 ;
453 const int nz = A.graph.entries.extent(0);
454 const size_t ncol = x.extent(1);
457 cusparseStatus_t status =
458 cusparseScsrmm( s.handle ,
459 CUSPARSE_OPERATION_NON_TRANSPOSE ,
464 A.graph.row_map.data() ,
465 A.graph.entries.data() ,
472 if ( CUSPARSE_STATUS_SUCCESS != status ) {
473 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
480 CrsMatrix<
double , Kokkos::Cuda > ,
481 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
482 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
488 typedef execution_space::size_type size_type;
489 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
490 typedef CrsMatrix< double , execution_space > matrix_type;
494 static void apply(
const matrix_type & A ,
495 const multi_vector_type & x ,
496 const multi_vector_type & y )
498 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
499 const double alpha = 1 , beta = 0 ;
500 const int n = A.graph.row_map.extent(0) - 1 ;
501 const int nz = A.graph.entries.extent(0);
502 const size_t ncol = x.extent(1);
505 cusparseStatus_t status =
506 cusparseDcsrmm( s.handle ,
507 CUSPARSE_OPERATION_NON_TRANSPOSE ,
512 A.graph.row_map.data() ,
513 A.graph.entries.data() ,
520 if ( CUSPARSE_STATUS_SUCCESS != status ) {
521 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
529 template <
typename Ordinal>
531 CrsMatrix<
double , Kokkos::Cuda > ,
532 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
533 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
534 std::vector<Ordinal> ,
539 typedef execution_space::size_type size_type;
540 typedef Kokkos::View< double*, Kokkos::LayoutLeft, execution_space > vector_type;
541 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
542 typedef CrsMatrix< double , execution_space > matrix_type;
543 typedef Kokkos::View< size_type*, execution_space > column_indices_type;
545 const matrix_type m_A;
546 const multi_vector_type m_x;
547 multi_vector_type m_y;
548 const column_indices_type m_col;
549 const size_type m_num_col;
551 Multiply(
const matrix_type& A,
552 const multi_vector_type& x,
553 multi_vector_type& y,
554 const column_indices_type& col) :
559 m_num_col(col.extent(0)) {}
562 inline void operator() (
const size_type iRow )
const {
563 const size_type iEntryBegin = m_A.graph.row_map[iRow];
564 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
566 for (size_type
j=0;
j<m_num_col;
j++) {
567 size_type iCol = m_col_indices[
j];
571 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
572 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
575 m_y( iRow, iCol ) =
sum;
582 static void apply(
const matrix_type & A ,
583 const multi_vector_type & x ,
584 const multi_vector_type & y ,
585 const std::vector<Ordinal> & col_indices )
588 Kokkos::View<Ordinal*,execution_space> col_indices_dev(
589 Kokkos::ViewAllocateWithoutInitializing(
"col_indices"), ncol);
590 typename Kokkos::View<Ordinal*,execution_space>::HostMirror col_indices_host =
592 for (
size_t i=0; i<ncol; ++i)
593 col_indices_host(i) = col_indices[i];
596 const size_t n = A.graph.row_map.extent(0) - 1 ;
597 Kokkos::parallel_for( n , Multiply(A,x,y,col_indices_dev) );
605 CrsMatrix< float , Kokkos::Cuda > ,
606 std::vector< Kokkos::View< float* , Kokkos::Cuda > >,
607 std::vector< Kokkos::View< float* , Kokkos::Cuda > >,
613 typedef execution_space::size_type size_type ;
614 typedef Kokkos::View< float* , execution_space > vector_type ;
615 typedef CrsMatrix< float , execution_space > matrix_type ;
619 static void apply(
const matrix_type & A ,
620 const std::vector<vector_type> & x ,
621 const std::vector<vector_type> & y )
623 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
624 const float alpha = 1 , beta = 0 ;
625 const int n = A.graph.row_map.extent(0) - 1 ;
626 const int nz = A.graph.entries.extent(0);
627 const size_t ncol = x.size();
630 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
631 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
633 for (
size_t col=0; col<ncol; col++) {
634 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
635 vector_type xx_view = Kokkos::subview( xx , span );
640 cusparseStatus_t status =
641 cusparseScsrmm( s.handle ,
642 CUSPARSE_OPERATION_NON_TRANSPOSE ,
647 A.graph.row_map.data() ,
648 A.graph.entries.data() ,
655 if ( CUSPARSE_STATUS_SUCCESS != status ) {
656 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
660 for (
size_t col=0; col<ncol; col++) {
661 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
662 vector_type yy_view = Kokkos::subview( yy , span );
670 CrsMatrix<
double , Kokkos::Cuda > ,
671 std::vector< Kokkos::View< double* , Kokkos::Cuda > >,
672 std::vector< Kokkos::View< double* , Kokkos::Cuda > >,
678 typedef execution_space::size_type size_type ;
679 typedef Kokkos::View< double* , execution_space > vector_type ;
680 typedef CrsMatrix< double , execution_space > matrix_type ;
684 static void apply(
const matrix_type & A ,
685 const std::vector<vector_type> & x ,
686 const std::vector<vector_type> & y )
688 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
689 const double alpha = 1 , beta = 0 ;
690 const int n = A.graph.row_map.extent(0) - 1 ;
691 const int nz = A.graph.entries.extent(0);
692 const size_t ncol = x.size();
695 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
696 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
698 for (
size_t col=0; col<ncol; col++) {
699 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
700 vector_type xx_view = Kokkos::subview( xx , span );
705 cusparseStatus_t status =
706 cusparseDcsrmm( s.handle ,
707 CUSPARSE_OPERATION_NON_TRANSPOSE ,
712 A.graph.row_map.data() ,
713 A.graph.entries.data() ,
720 if ( CUSPARSE_STATUS_SUCCESS != status ) {
721 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
725 for (
size_t col=0; col<ncol; col++) {
726 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
727 vector_type yy_view = Kokkos::subview( yy , span );
Kokkos::DefaultExecutionSpace execution_space
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)