10 #ifndef STOKHOS_CUDA_CRSMATRIX_HPP
11 #define STOKHOS_CUDA_CRSMATRIX_HPP
14 #ifdef HAVE_STOKHOS_CUSPARSE
20 #include <cuda_runtime.h>
22 #include <cusparse_v2.h>
24 #include "Kokkos_Core.hpp"
32 #if CUSPARSE_VERSION >= 11401
33 #define USE_NEW_SPMV 1
34 #define NEW_SPMV_ALG_DEFAULTS \
35 cusparseSpMVAlg_t alg_spmv = CUSPARSE_SPMV_ALG_DEFAULT;
36 #define NEW_SPMM_ALG_DEFAULTS \
37 cusparseSpMMAlg_t alg_spmm = CUSPARSE_SPMM_ALG_DEFAULT;
38 #elif CUSPARSE_VERSION >= 11000
39 #define USE_NEW_SPMV 1
40 #define NEW_SPMV_ALG_DEFAULTS \
41 cusparseSpMVAlg_t alg_spmv = CUSPARSE_MV_ALG_DEFAULT;
42 #define NEW_SPMM_ALG_DEFAULTS \
43 cusparseSpMMAlg_t alg_spmm = CUSPARSE_MM_ALG_DEFAULT;
45 #define USE_NEW_SPMV 0
50 class CudaSparseSingleton {
53 cusparseStatus_t status;
54 cusparseHandle_t handle;
55 cusparseMatDescr_t descra;
57 static CudaSparseSingleton & singleton();
63 status = cusparseCreate(&handle);
64 if(status != CUSPARSE_STATUS_SUCCESS)
66 throw std::runtime_error( std::string(
"ERROR - CUSPARSE Library Initialization failed" ) );
69 status = cusparseCreateMatDescr(&descra);
70 if(status != CUSPARSE_STATUS_SUCCESS)
72 throw std::runtime_error( std::string(
"ERROR - CUSPARSE Library Matrix descriptor failed" ) );
75 cusparseSetMatType(descra , CUSPARSE_MATRIX_TYPE_GENERAL);
76 cusparseSetMatIndexBase(descra , CUSPARSE_INDEX_BASE_ZERO);
79 CudaSparseSingleton(
const CudaSparseSingleton & );
80 CudaSparseSingleton & operator = (
const CudaSparseSingleton & );
83 CudaSparseSingleton & CudaSparseSingleton::singleton()
85 static CudaSparseSingleton s ;
return s ;
90 CrsMatrix< float , Kokkos::Cuda > ,
91 Kokkos::View< float* , Kokkos::Cuda > ,
92 Kokkos::View< float* , Kokkos::Cuda > ,
98 typedef execution_space::size_type size_type ;
99 typedef Kokkos::View< float* , execution_space > vector_type ;
100 typedef CrsMatrix< float , execution_space > matrix_type ;
104 static void apply(
const matrix_type &
A ,
105 const vector_type & x ,
106 const vector_type & y )
108 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
109 const float alpha = 1 , beta = 0 ;
110 const int n = A.graph.row_map.extent(0) - 1 ;
111 const int nz = A.graph.entries.extent(0);
114 NEW_SPMV_ALG_DEFAULTS
115 using offset_type =
typename matrix_type::graph_type::size_type;
116 using entry_type =
typename matrix_type::graph_type::data_type;
117 const cusparseIndexType_t myCusparseOffsetType =
118 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
119 const cusparseIndexType_t myCusparseEntryType =
120 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
121 cusparseSpMatDescr_t A_cusparse;
123 &A_cusparse, n, n, nz,
124 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
125 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
126 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
127 cusparseDnVecDescr_t x_cusparse, y_cusparse;
128 cusparseCreateDnVec(&x_cusparse, n, (
void*)x.data(), CUDA_R_32F);
129 cusparseCreateDnVec(&y_cusparse, n, (
void*)y.data(), CUDA_R_32F);
130 size_t bufferSize = 0;
131 void* dBuffer = NULL;
132 cusparseSpMV_bufferSize(
133 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
134 x_cusparse, &beta, y_cusparse, CUDA_R_32F, alg_spmv,
136 cudaMalloc(&dBuffer, bufferSize);
137 cusparseStatus_t status =
138 cusparseSpMV(s.handle ,
139 CUSPARSE_OPERATION_NON_TRANSPOSE ,
149 cusparseDestroyDnVec(x_cusparse);
150 cusparseDestroyDnVec(y_cusparse);
151 cusparseDestroySpMat(A_cusparse);
153 cusparseStatus_t status =
154 cusparseScsrmv( s.handle ,
155 CUSPARSE_OPERATION_NON_TRANSPOSE ,
160 A.graph.row_map.data() ,
161 A.graph.entries.data() ,
167 if ( CUSPARSE_STATUS_SUCCESS != status ) {
168 throw std::runtime_error( std::string(
"ERROR - cusparseScsrmv " ) );
175 CrsMatrix<
double , Kokkos::Cuda > ,
176 Kokkos::View< double* , Kokkos::Cuda > ,
177 Kokkos::View< double* , Kokkos::Cuda > ,
183 typedef execution_space::size_type size_type ;
184 typedef Kokkos::View< double* , execution_space > vector_type ;
185 typedef CrsMatrix< double , execution_space > matrix_type ;
189 static void apply(
const matrix_type & A ,
190 const vector_type & x ,
191 const vector_type & y )
193 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
194 const double alpha = 1 , beta = 0 ;
195 const int n = A.graph.row_map.extent(0) - 1 ;
196 const int nz = A.graph.entries.extent(0);
199 NEW_SPMV_ALG_DEFAULTS
200 using offset_type =
typename matrix_type::graph_type::size_type;
201 using entry_type =
typename matrix_type::graph_type::data_type;
202 const cusparseIndexType_t myCusparseOffsetType =
203 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
204 const cusparseIndexType_t myCusparseEntryType =
205 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
206 cusparseSpMatDescr_t A_cusparse;
208 &A_cusparse, n, n, nz,
209 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
210 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
211 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
212 cusparseDnVecDescr_t x_cusparse, y_cusparse;
213 cusparseCreateDnVec(&x_cusparse, n, (
void*)x.data(), CUDA_R_64F);
214 cusparseCreateDnVec(&y_cusparse, n, (
void*)y.data(), CUDA_R_64F);
215 size_t bufferSize = 0;
216 void* dBuffer = NULL;
217 cusparseSpMV_bufferSize(
218 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
219 x_cusparse, &beta, y_cusparse, CUDA_R_64F, alg_spmv,
221 cudaMalloc(&dBuffer, bufferSize);
222 cusparseStatus_t status =
223 cusparseSpMV(s.handle ,
224 CUSPARSE_OPERATION_NON_TRANSPOSE ,
234 cusparseDestroyDnVec(x_cusparse);
235 cusparseDestroyDnVec(y_cusparse);
236 cusparseDestroySpMat(A_cusparse);
238 cusparseStatus_t status =
239 cusparseDcsrmv( s.handle ,
240 CUSPARSE_OPERATION_NON_TRANSPOSE ,
245 A.graph.row_map.data() ,
246 A.graph.entries.data() ,
252 if ( CUSPARSE_STATUS_SUCCESS != status ) {
253 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
258 template <
typename Ordinal>
260 CrsMatrix< float , Kokkos::Cuda > ,
261 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
262 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
263 std::vector<Ordinal> ,
268 typedef execution_space::size_type size_type;
269 typedef Kokkos::View< float*, Kokkos::LayoutLeft, execution_space > vector_type;
270 typedef Kokkos::View< float**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
271 typedef CrsMatrix< float , execution_space > matrix_type;
275 static void apply(
const matrix_type & A ,
276 const multi_vector_type & x ,
277 const multi_vector_type & y ,
278 const std::vector<Ordinal> & col_indices )
280 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
281 const float alpha = 1 , beta = 0 ;
282 const int n = A.graph.row_map.extent(0) - 1 ;
283 const int nz = A.graph.entries.extent(0);
284 const size_t ncol = col_indices.size();
287 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
288 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
290 for (
size_t col=0; col<ncol; col++) {
291 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
292 vector_type xx_view = Kokkos::subview( xx , span );
294 Kokkos::subview( x, Kokkos::ALL(), col_indices[col] );
300 NEW_SPMM_ALG_DEFAULTS
301 using offset_type =
typename matrix_type::graph_type::size_type;
302 using entry_type =
typename matrix_type::graph_type::data_type;
303 const cusparseIndexType_t myCusparseOffsetType =
304 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
305 const cusparseIndexType_t myCusparseEntryType =
306 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
307 cusparseSpMatDescr_t A_cusparse;
309 &A_cusparse, n, n, nz,
310 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
311 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
312 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
313 cusparseDnMatDescr_t x_cusparse, y_cusparse;
315 &x_cusparse, n, ncol, n,
316 (
void*)xx.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
318 &y_cusparse, n, ncol, n,
319 (
void*)yy.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
320 size_t bufferSize = 0;
321 void* dBuffer = NULL;
322 cusparseSpMM_bufferSize(
323 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
324 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
325 x_cusparse, &beta, y_cusparse, CUDA_R_32F, alg_spmm,
327 cudaMalloc(&dBuffer, bufferSize);
328 cusparseStatus_t status =
329 cusparseSpMM(s.handle ,
330 CUSPARSE_OPERATION_NON_TRANSPOSE ,
331 CUSPARSE_OPERATION_NON_TRANSPOSE ,
341 cusparseDestroyDnMat(x_cusparse);
342 cusparseDestroyDnMat(y_cusparse);
343 cusparseDestroySpMat(A_cusparse);
345 cusparseStatus_t status =
346 cusparseScsrmm( s.handle ,
347 CUSPARSE_OPERATION_NON_TRANSPOSE ,
352 A.graph.row_map.data() ,
353 A.graph.entries.data() ,
361 if ( CUSPARSE_STATUS_SUCCESS != status ) {
362 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
366 for (
size_t col=0; col<ncol; col++) {
367 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
368 vector_type yy_view = Kokkos::subview( yy , span );
370 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
376 #define USE_CUSPARSE 1
379 template <
typename Ordinal>
381 CrsMatrix<
double , Kokkos::Cuda > ,
382 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
383 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
384 std::vector<Ordinal> ,
389 typedef execution_space::size_type size_type;
390 typedef Kokkos::View< double*, Kokkos::LayoutLeft, execution_space > vector_type;
391 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
392 typedef CrsMatrix< double , execution_space > matrix_type;
396 #define USE_TRANSPOSE 0
403 struct GatherTranspose {
405 typedef execution_space::size_type size_type;
407 multi_vector_type m_xt;
408 const multi_vector_type m_x;
409 const Kokkos::View<Ordinal*,execution_space> m_col;
410 const size_type m_ncol;
412 GatherTranspose( multi_vector_type& xt,
413 const multi_vector_type& x,
414 const Kokkos::View<Ordinal*,execution_space>& col ) :
415 m_xt(xt), m_x(x), m_col(col), m_ncol(col.extent(0)) {}
418 inline void operator() (size_type i)
const {
419 for (size_type
j=0;
j<m_ncol; ++
j)
420 m_xt(
j,i) = m_x(i,m_col(
j));
423 static void apply( multi_vector_type& xt,
424 const multi_vector_type& x,
425 const Kokkos::View<Ordinal*,execution_space>& col ) {
426 const size_type n = x.extent(0);
427 Kokkos::parallel_for( n , GatherTranspose(xt,x,col) );
431 static void apply(
const matrix_type & A ,
432 const multi_vector_type & x ,
433 const multi_vector_type & y ,
434 const std::vector<Ordinal> & col_indices )
436 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
437 const double alpha = 1 , beta = 0 ;
438 const int n = A.graph.row_map.extent(0) - 1 ;
439 const int nz = A.graph.entries.extent(0);
440 const size_t ncol = col_indices.size();
443 Kokkos::View<Ordinal*,execution_space> col_indices_dev(
444 Kokkos::ViewAllocateWithoutInitializing(
"col_indices"), ncol);
445 typename Kokkos::View<Ordinal*,execution_space>::HostMirror col_indices_host =
447 for (
size_t i=0; i<ncol; ++i)
448 col_indices_host(i) = col_indices[i];
452 multi_vector_type xx(
453 Kokkos::ViewAllocateWithoutInitializing(
"xx"), ncol , n );
454 GatherTranspose::apply(xx, x, col_indices_dev);
457 multi_vector_type yy(
458 Kokkos::ViewAllocateWithoutInitializing(
"yy"), n , ncol );
461 cusparseStatus_t status =
462 cusparseDcsrmm2( s.handle ,
463 CUSPARSE_OPERATION_NON_TRANSPOSE ,
464 CUSPARSE_OPERATION_TRANSPOSE ,
469 A.graph.row_map.data() ,
470 A.graph.entries.data() ,
477 if ( CUSPARSE_STATUS_SUCCESS != status ) {
478 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
482 for (
size_t col=0; col<ncol; col++) {
483 vector_type yy_view =
484 Kokkos::subview( yy , Kokkos::ALL(), col );
486 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
491 static void apply(
const matrix_type & A ,
492 const multi_vector_type & x ,
493 const multi_vector_type & y ,
494 const std::vector<Ordinal> & col_indices )
496 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
497 const double alpha = 1 , beta = 0 ;
498 const int n = A.graph.row_map.extent(0) - 1 ;
499 const int nz = A.graph.entries.extent(0);
500 const size_t ncol = col_indices.size();
503 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
504 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
506 for (
size_t col=0; col<ncol; col++) {
507 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
508 vector_type xx_view = Kokkos::subview( xx , span );
510 Kokkos::subview( x, Kokkos::ALL(), col_indices[col] );
516 NEW_SPMM_ALG_DEFAULTS
517 using offset_type =
typename matrix_type::graph_type::size_type;
518 using entry_type =
typename matrix_type::graph_type::data_type;
519 const cusparseIndexType_t myCusparseOffsetType =
520 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
521 const cusparseIndexType_t myCusparseEntryType =
522 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
523 cusparseSpMatDescr_t A_cusparse;
525 &A_cusparse, n, n, nz,
526 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
527 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
528 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
529 cusparseDnMatDescr_t x_cusparse, y_cusparse;
531 &x_cusparse, n, ncol, n,
532 (
void*)xx.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
534 &y_cusparse, n, ncol, n,
535 (
void*)yy.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
536 size_t bufferSize = 0;
537 void* dBuffer = NULL;
538 cusparseSpMM_bufferSize(
539 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
540 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
541 x_cusparse, &beta, y_cusparse, CUDA_R_64F, alg_spmm,
543 cudaMalloc(&dBuffer, bufferSize);
544 cusparseStatus_t status =
545 cusparseSpMM(s.handle ,
546 CUSPARSE_OPERATION_NON_TRANSPOSE ,
547 CUSPARSE_OPERATION_NON_TRANSPOSE ,
557 cusparseDestroyDnMat(x_cusparse);
558 cusparseDestroyDnMat(y_cusparse);
559 cusparseDestroySpMat(A_cusparse);
561 cusparseStatus_t status =
562 cusparseDcsrmm( s.handle ,
563 CUSPARSE_OPERATION_NON_TRANSPOSE ,
568 A.graph.row_map.data() ,
569 A.graph.entries.data() ,
577 if ( CUSPARSE_STATUS_SUCCESS != status ) {
578 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
582 for (
size_t col=0; col<ncol; col++) {
583 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
584 vector_type yy_view = Kokkos::subview( yy , span );
586 Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
595 CrsMatrix< float , Kokkos::Cuda > ,
596 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
597 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
603 typedef execution_space::size_type size_type;
604 typedef Kokkos::View< float**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
605 typedef CrsMatrix< float , execution_space > matrix_type;
609 static void apply(
const matrix_type & A ,
610 const multi_vector_type & x ,
611 const multi_vector_type & y )
613 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
614 const float alpha = 1 , beta = 0 ;
615 const int n = A.graph.row_map.extent(0) - 1 ;
616 const int nz = A.graph.entries.extent(0);
617 const size_t ncol = x.extent(1);
621 NEW_SPMM_ALG_DEFAULTS
622 using offset_type =
typename matrix_type::graph_type::size_type;
623 using entry_type =
typename matrix_type::graph_type::data_type;
624 const cusparseIndexType_t myCusparseOffsetType =
625 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
626 const cusparseIndexType_t myCusparseEntryType =
627 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
628 cusparseSpMatDescr_t A_cusparse;
630 &A_cusparse, n, n, nz,
631 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
632 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
633 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
634 cusparseDnMatDescr_t x_cusparse, y_cusparse;
636 &x_cusparse, n, ncol, x.stride(1),
637 (
void*)x.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
639 &y_cusparse, n, ncol, y.stride(1),
640 (
void*)y.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
641 size_t bufferSize = 0;
642 void* dBuffer = NULL;
643 cusparseSpMM_bufferSize(
644 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
645 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
646 x_cusparse, &beta, y_cusparse, CUDA_R_32F, alg_spmm,
648 cudaMalloc(&dBuffer, bufferSize);
649 cusparseStatus_t status =
650 cusparseSpMM(s.handle ,
651 CUSPARSE_OPERATION_NON_TRANSPOSE ,
652 CUSPARSE_OPERATION_NON_TRANSPOSE ,
662 cusparseDestroyDnMat(x_cusparse);
663 cusparseDestroyDnMat(y_cusparse);
664 cusparseDestroySpMat(A_cusparse);
666 cusparseStatus_t status =
667 cusparseScsrmm( s.handle ,
668 CUSPARSE_OPERATION_NON_TRANSPOSE ,
673 A.graph.row_map.data() ,
674 A.graph.entries.data() ,
682 if ( CUSPARSE_STATUS_SUCCESS != status ) {
683 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
690 CrsMatrix<
double , Kokkos::Cuda > ,
691 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
692 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
698 typedef execution_space::size_type size_type;
699 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
700 typedef CrsMatrix< double , execution_space > matrix_type;
704 static void apply(
const matrix_type & A ,
705 const multi_vector_type & x ,
706 const multi_vector_type & y )
708 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
709 const double alpha = 1 , beta = 0 ;
710 const int n = A.graph.row_map.extent(0) - 1 ;
711 const int nz = A.graph.entries.extent(0);
712 const size_t ncol = x.extent(1);
716 NEW_SPMM_ALG_DEFAULTS
717 using offset_type =
typename matrix_type::graph_type::size_type;
718 using entry_type =
typename matrix_type::graph_type::data_type;
719 const cusparseIndexType_t myCusparseOffsetType =
720 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
721 const cusparseIndexType_t myCusparseEntryType =
722 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
723 cusparseSpMatDescr_t A_cusparse;
725 &A_cusparse, n, n, nz,
726 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
727 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
728 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
729 cusparseDnMatDescr_t x_cusparse, y_cusparse;
731 &x_cusparse, n, ncol, x.stride(1),
732 (
void*)x.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
734 &y_cusparse, n, ncol, y.stride(1),
735 (
void*)y.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
736 size_t bufferSize = 0;
737 void* dBuffer = NULL;
738 cusparseSpMM_bufferSize(
739 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
740 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
741 x_cusparse, &beta, y_cusparse, CUDA_R_64F, alg_spmm,
743 cudaMalloc(&dBuffer, bufferSize);
744 cusparseStatus_t status =
745 cusparseSpMM(s.handle ,
746 CUSPARSE_OPERATION_NON_TRANSPOSE ,
747 CUSPARSE_OPERATION_NON_TRANSPOSE ,
757 cusparseDestroyDnMat(x_cusparse);
758 cusparseDestroyDnMat(y_cusparse);
759 cusparseDestroySpMat(A_cusparse);
761 cusparseStatus_t status =
762 cusparseDcsrmm( s.handle ,
763 CUSPARSE_OPERATION_NON_TRANSPOSE ,
768 A.graph.row_map.data() ,
769 A.graph.entries.data() ,
777 if ( CUSPARSE_STATUS_SUCCESS != status ) {
778 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
786 template <
typename Ordinal>
788 CrsMatrix<
double , Kokkos::Cuda > ,
789 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
790 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
791 std::vector<Ordinal> ,
796 typedef execution_space::size_type size_type;
797 typedef Kokkos::View< double*, Kokkos::LayoutLeft, execution_space > vector_type;
798 typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
799 typedef CrsMatrix< double , execution_space > matrix_type;
800 typedef Kokkos::View< size_type*, execution_space > column_indices_type;
802 const matrix_type m_A;
803 const multi_vector_type m_x;
804 multi_vector_type m_y;
805 const column_indices_type m_col;
806 const size_type m_num_col;
808 Multiply(
const matrix_type& A,
809 const multi_vector_type& x,
810 multi_vector_type& y,
811 const column_indices_type& col) :
816 m_num_col(col.extent(0)) {}
819 inline void operator() (
const size_type iRow )
const {
820 const size_type iEntryBegin = m_A.graph.row_map[iRow];
821 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
823 for (size_type
j=0;
j<m_num_col;
j++) {
824 size_type iCol = m_col_indices[
j];
828 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
829 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
832 m_y( iRow, iCol ) =
sum;
839 static void apply(
const matrix_type & A ,
840 const multi_vector_type & x ,
841 const multi_vector_type & y ,
842 const std::vector<Ordinal> & col_indices )
845 Kokkos::View<Ordinal*,execution_space> col_indices_dev(
846 Kokkos::ViewAllocateWithoutInitializing(
"col_indices"), ncol);
847 typename Kokkos::View<Ordinal*,execution_space>::HostMirror col_indices_host =
849 for (
size_t i=0; i<ncol; ++i)
850 col_indices_host(i) = col_indices[i];
853 const size_t n = A.graph.row_map.extent(0) - 1 ;
854 Kokkos::parallel_for( n , Multiply(A,x,y,col_indices_dev) );
862 CrsMatrix< float , Kokkos::Cuda > ,
863 std::vector< Kokkos::View< float* , Kokkos::Cuda > >,
864 std::vector< Kokkos::View< float* , Kokkos::Cuda > >,
870 typedef execution_space::size_type size_type ;
871 typedef Kokkos::View< float* , execution_space > vector_type ;
872 typedef CrsMatrix< float , execution_space > matrix_type ;
876 static void apply(
const matrix_type & A ,
877 const std::vector<vector_type> & x ,
878 const std::vector<vector_type> & y )
880 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
881 const float alpha = 1 , beta = 0 ;
882 const int n = A.graph.row_map.extent(0) - 1 ;
883 const int nz = A.graph.entries.extent(0);
884 const size_t ncol = x.size();
887 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
888 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
890 for (
size_t col=0; col<ncol; col++) {
891 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
892 vector_type xx_view = Kokkos::subview( xx , span );
898 NEW_SPMM_ALG_DEFAULTS
899 using offset_type =
typename matrix_type::graph_type::size_type;
900 using entry_type =
typename matrix_type::graph_type::data_type;
901 const cusparseIndexType_t myCusparseOffsetType =
902 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
903 const cusparseIndexType_t myCusparseEntryType =
904 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
905 cusparseSpMatDescr_t A_cusparse;
907 &A_cusparse, n, n, nz,
908 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
909 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
910 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
911 cusparseDnMatDescr_t x_cusparse, y_cusparse;
913 &x_cusparse, n, ncol, n,
914 (
void*)xx.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
916 &y_cusparse, n, ncol, n,
917 (
void*)yy.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
918 size_t bufferSize = 0;
919 void* dBuffer = NULL;
920 cusparseSpMM_bufferSize(
921 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
922 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
923 x_cusparse, &beta, y_cusparse, CUDA_R_32F, alg_spmm,
925 cudaMalloc(&dBuffer, bufferSize);
926 cusparseStatus_t status =
927 cusparseSpMM(s.handle ,
928 CUSPARSE_OPERATION_NON_TRANSPOSE ,
929 CUSPARSE_OPERATION_NON_TRANSPOSE ,
939 cusparseDestroyDnMat(x_cusparse);
940 cusparseDestroyDnMat(y_cusparse);
941 cusparseDestroySpMat(A_cusparse);
943 cusparseStatus_t status =
944 cusparseScsrmm( s.handle ,
945 CUSPARSE_OPERATION_NON_TRANSPOSE ,
950 A.graph.row_map.data() ,
951 A.graph.entries.data() ,
959 if ( CUSPARSE_STATUS_SUCCESS != status ) {
960 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
964 for (
size_t col=0; col<ncol; col++) {
965 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
966 vector_type yy_view = Kokkos::subview( yy , span );
974 CrsMatrix<
double , Kokkos::Cuda > ,
975 std::vector< Kokkos::View< double* , Kokkos::Cuda > >,
976 std::vector< Kokkos::View< double* , Kokkos::Cuda > >,
982 typedef execution_space::size_type size_type ;
983 typedef Kokkos::View< double* , execution_space > vector_type ;
984 typedef CrsMatrix< double , execution_space > matrix_type ;
988 static void apply(
const matrix_type & A ,
989 const std::vector<vector_type> & x ,
990 const std::vector<vector_type> & y )
992 CudaSparseSingleton & s = CudaSparseSingleton::singleton();
993 const double alpha = 1 , beta = 0 ;
994 const int n = A.graph.row_map.extent(0) - 1 ;
995 const int nz = A.graph.entries.extent(0);
996 const size_t ncol = x.size();
999 vector_type xx( Kokkos::ViewAllocateWithoutInitializing(
"xx"), n * ncol );
1000 vector_type yy( Kokkos::ViewAllocateWithoutInitializing(
"yy"), n * ncol );
1002 for (
size_t col=0; col<ncol; col++) {
1003 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
1004 vector_type xx_view = Kokkos::subview( xx , span );
1010 NEW_SPMM_ALG_DEFAULTS
1011 using offset_type =
typename matrix_type::graph_type::size_type;
1012 using entry_type =
typename matrix_type::graph_type::data_type;
1013 const cusparseIndexType_t myCusparseOffsetType =
1014 sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
1015 const cusparseIndexType_t myCusparseEntryType =
1016 sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
1017 cusparseSpMatDescr_t A_cusparse;
1019 &A_cusparse, n, n, nz,
1020 (
void*)A.graph.row_map.data(), (
void*)A.graph.entries.data(),
1021 (
void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
1022 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
1023 cusparseDnMatDescr_t x_cusparse, y_cusparse;
1024 cusparseCreateDnMat(
1025 &x_cusparse, n, ncol, n,
1026 (
void*)xx.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
1027 cusparseCreateDnMat(
1028 &y_cusparse, n, ncol, n,
1029 (
void*)yy.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
1030 size_t bufferSize = 0;
1031 void* dBuffer = NULL;
1032 cusparseSpMM_bufferSize(
1033 s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1034 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
1035 x_cusparse, &beta, y_cusparse, CUDA_R_64F, alg_spmm,
1037 cudaMalloc(&dBuffer, bufferSize);
1038 cusparseStatus_t status =
1039 cusparseSpMM(s.handle ,
1040 CUSPARSE_OPERATION_NON_TRANSPOSE ,
1041 CUSPARSE_OPERATION_NON_TRANSPOSE ,
1051 cusparseDestroyDnMat(x_cusparse);
1052 cusparseDestroyDnMat(y_cusparse);
1053 cusparseDestroySpMat(A_cusparse);
1055 cusparseStatus_t status =
1056 cusparseDcsrmm( s.handle ,
1057 CUSPARSE_OPERATION_NON_TRANSPOSE ,
1062 A.graph.row_map.data() ,
1063 A.graph.entries.data() ,
1071 if ( CUSPARSE_STATUS_SUCCESS != status ) {
1072 throw std::runtime_error( std::string(
"ERROR - cusparseDcsrmv " ) );
1076 for (
size_t col=0; col<ncol; col++) {
1077 const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
1078 vector_type yy_view = Kokkos::subview( yy , span );
Kokkos::DefaultExecutionSpace execution_space
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)