10 #ifndef STOKHOS_CRSMATRIX_HPP
11 #define STOKHOS_CRSMATRIX_HPP
16 #include "Kokkos_Core.hpp"
17 #include "KokkosSparse_StaticCrsGraph.hpp"
27 Dim3(
const size_t x_,
const size_t y_ = 1,
const size_t z_ = 1) :
28 x(x_),
y(y_),
z(z_) {}
36 const size_t threads_per_block_x_,
37 const size_t threads_per_block_y_ = 1,
38 const size_t threads_per_block_z_ = 1) :
39 block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
46 template <
typename ValueType,
typename Device,
47 typename Layout = Kokkos::LayoutRight>
52 typedef Kokkos::View< value_type[], Layout, execution_space >
values_type;
53 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
54 typedef KokkosSparse::StaticCrsGraph< int , Layout, execution_space , int >
graph_type;
56 typedef KokkosSparse::StaticCrsGraph< int , Layout, execution_space , void, int >
graph_type;
71 template <
typename MatrixValue,
74 typename InputVectorType,
75 typename OutputVectorType>
88 typedef typename execution_space::size_type
size_type;
105 KOKKOS_INLINE_FUNCTION
108 const size_type iEntryBegin = m_A.graph.row_map[iRow];
109 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
113 for (
size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry ) {
114 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry) );
124 const size_t row_count = A.
graph.row_map.extent(0) - 1;
125 Kokkos::parallel_for( row_count,
Multiply(A,x,y) );
130 template <
typename MatrixValue,
133 typename InputMultiVectorType,
134 typename OutputMultiVectorType,
135 typename OrdinalType >
137 InputMultiVectorType,
138 OutputMultiVectorType,
139 std::vector<OrdinalType>,
165 , m_col_indices( col_indices )
166 , m_num_vecs( col_indices.size() )
171 KOKKOS_INLINE_FUNCTION
174 const size_type iEntryBegin = m_A.graph.row_map[iRow];
175 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
182 for (
size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
183 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
186 m_y( iRow, iCol ) =
sum;
197 const size_t n = A.
graph.row_map.extent(0) - 1 ;
200 const size_t block_size = 20;
201 const size_t num_vecs = col.size();
202 std::vector<OrdinalType> block_col;
203 block_col.reserve(block_size);
204 for (
size_t block=0; block<num_vecs; block+=block_size) {
206 block+block_size <= num_vecs ? block_size : num_vecs-block;
207 block_col.resize(bs);
208 for (
size_t i=0; i<bs; ++i)
209 block_col[i] = col[block+i];
210 Kokkos::parallel_for( n ,
Multiply(A,x,y,block_col) );
221 template <
typename MatrixValue,
224 typename InputMultiVectorType,
225 typename OutputMultiVectorType >
227 InputMultiVectorType,
228 OutputMultiVectorType,
256 , m_num_row( A.graph.row_map.extent(0)-1 )
257 , m_num_col( m_y.extent(1) )
263 KOKKOS_INLINE_FUNCTION
268 iBlockRow+m_block_row_size <= m_num_row ?
269 m_block_row_size : m_num_row-iBlockRow;
272 for (
size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
275 iBlockCol+m_block_col_size <= m_num_col ?
276 m_block_col_size : m_num_col-iBlockCol;
279 const size_type iRowEnd = iBlockRow + num_row;
280 for (
size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
283 const size_type iEntryBegin = m_A.graph.row_map[iRow];
284 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
287 const size_type iColEnd = iBlockCol + num_col;
288 for (
size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
292 for (
size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
293 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
295 m_y( iRow, iCol ) =
sum;
311 const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
312 Kokkos::parallel_for( n ,
Multiply(A,x,y) );
317 template <
typename MatrixValue,
320 typename InputMultiVectorType,
321 typename OutputMultiVectorType >
322 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
323 InputMultiVectorType,
324 OutputMultiVectorType,
329 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
330 typedef InputMultiVectorType input_multi_vector_type;
331 typedef OutputMultiVectorType output_multi_vector_type;
334 typedef typename execution_space::size_type size_type;
337 const matrix_type m_A;
338 const input_multi_vector_type m_x;
339 output_multi_vector_type m_y;
340 const size_type m_num_vecs;
342 Multiply(
const matrix_type&
A,
343 const input_multi_vector_type& x,
344 output_multi_vector_type& y)
348 , m_num_vecs( m_y.extent(1) )
353 KOKKOS_INLINE_FUNCTION
354 void operator()(
const size_type iRow )
const
356 const size_type iEntryBegin = m_A.graph.row_map[iRow];
357 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
359 for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
363 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
364 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
367 m_y( iRow, iCol ) =
sum;
373 static void apply(
const matrix_type&
A,
374 const input_multi_vector_type& x,
375 output_multi_vector_type& y )
377 const size_t n = A.graph.row_map.extent(0) - 1 ;
378 Kokkos::parallel_for( n , Multiply(A,x,y) );
401 template <
typename MatrixValue,
404 typename InputViewType,
405 typename OutputViewType>
407 std::vector<InputViewType>,
408 std::vector<OutputViewType>,
436 , m_num_row( A.graph.row_map.extent(0)-1 )
437 , m_num_col( x.size() )
443 KOKKOS_INLINE_FUNCTION
448 iBlockRow+m_block_row_size <= m_num_row ?
449 m_block_row_size : m_num_row-iBlockRow;
452 for (
size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
455 iBlockCol+m_block_col_size <= m_num_col ?
456 m_block_col_size : m_num_col-iBlockCol;
459 const size_type iRowEnd = iBlockRow + num_row;
460 for (
size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
463 const size_type iEntryBegin = m_A.graph.row_map[iRow];
464 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
467 const size_type iColEnd = iBlockCol + num_col;
468 for (
size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
472 for (
size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
473 sum += m_A.values(iEntry) * m_x[iCol](m_A.graph.entries(iEntry));
475 m_y[iCol](iRow) = sum;
491 const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
492 Kokkos::parallel_for( n ,
Multiply(A,x,y) );
497 template <
typename MatrixValue,
500 typename InputViewType,
501 typename OutputViewType>
502 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
503 std::vector<InputViewType>,
504 std::vector<OutputViewType>,
509 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
510 typedef std::vector<InputViewType> input_multi_vector_type;
511 typedef std::vector<OutputViewType> output_multi_vector_type;
514 typedef typename execution_space::size_type size_type;
517 const matrix_type m_A;
518 const input_multi_vector_type m_x;
519 output_multi_vector_type m_y;
520 const size_type m_num_vecs;
522 Multiply(
const matrix_type& A,
523 const input_multi_vector_type& x,
524 output_multi_vector_type& y )
528 , m_num_vecs( x.size() )
534 KOKKOS_INLINE_FUNCTION
535 void operator()(
const size_type iRow )
const
537 const size_type iEntryBegin = m_A.graph.row_map[iRow];
538 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
540 for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
544 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
545 sum += m_A.values(iEntry) * m_x[iCol]( m_A.graph.entries(iEntry) );
548 m_y[iCol]( iRow) = sum;
554 static void apply(
const matrix_type & A,
555 const input_multi_vector_type& x,
556 output_multi_vector_type& y )
558 const size_t n = A.graph.row_map.extent(0) - 1 ;
559 Kokkos::parallel_for( n , Multiply(A,x,y) );
584 template <
typename MatrixValue,
587 typename InputMultiVectorType,
588 typename OutputMultiVectorType,
589 typename OrdinalType>
591 const InputMultiVectorType& x,
592 OutputMultiVectorType& y,
593 const std::vector<OrdinalType>& col_indices,
598 typedef Kokkos::View<typename InputMultiVectorType::value_type*, typename InputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> InputVectorType;
599 typedef Kokkos::View<typename OutputMultiVectorType::value_type*, typename OutputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> OutputVectorType;
601 for (
size_t i=0; i<col_indices.size(); ++i) {
602 InputVectorType x_view =
603 Kokkos::subview( x , Kokkos::ALL() , col_indices[i] );
604 OutputVectorType y_view =
605 Kokkos::subview( y , Kokkos::ALL() , col_indices[i] );
606 multiply_type::apply( A , x_view , y_view );
610 template <
typename MatrixValue,
613 typename InputVectorType,
614 typename OutputVectorType>
616 const std::vector<InputVectorType>& x,
617 std::vector<OutputVectorType>& y,
622 for (
size_t i=0; i<x.size(); ++i) {
623 multiply_type::apply( A , x[i] , y[i] );
634 template <
typename ValueType,
typename Layout,
typename Device>
644 template <
typename ValueType,
typename Layout,
typename Device>
654 template <
typename ValueType,
typename Layout,
typename DstDevice,
670 template <
typename MatrixValue,
typename Layout,
typename Device >
679 std::ofstream file(filename.c_str());
681 file.setf(std::ios::scientific);
689 file <<
"%%MatrixMarket matrix coordinate real general" << std::endl;
690 file << nRow <<
" " << nRow <<
" " << hA.
values.extent(0) << std::endl;
695 for (
size_type entry=entryBegin; entry<entryEnd; ++entry) {
696 file << row+1 <<
" " << hA.
graph.entries(entry)+1 <<
" "
697 << std::setw(22) << hA.
values(entry) << std::endl;
KOKKOS_INLINE_FUNCTION void operator()(const size_type iBlockRow) const
const size_type m_num_row
output_multi_vector_type m_y
const input_multi_vector_type m_x
const size_type m_num_col
Kokkos::DefaultExecutionSpace execution_space
execution_space::size_type size_type
Multiply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
Stokhos::CrsMatrix< ValueType, Device, Layout >::host_mirror_type create_mirror(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
CrsMatrix(Stokhos::DeviceConfig dev_config_)
Stokhos::CrsMatrix< ValueType, Device, Layout >::host_mirror_type create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
std::vector< InputViewType > input_multi_vector_type
std::vector< OutputViewType > output_multi_vector_type
static void write(const matrix_type &A, const std::string &filename)
DeviceConfig(const size_t num_blocks_, const size_t threads_per_block_x_, const size_t threads_per_block_y_=1, const size_t threads_per_block_z_=1)
CrsMatrix< MatrixValue, Device, Layout > matrix_type
Kokkos::View< value_type[], Layout, execution_space > values_type
size_t num_threads_per_block
KokkosSparse::StaticCrsGraph< int, Layout, execution_space, void, int > graph_type
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
execution_space::size_type size_type
CrsMatrix< ValueType, typename values_type::host_mirror_space, Layout > host_mirror_type
Stokhos::DeviceConfig dev_config
OutputViewType::value_type scalar_type
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
CrsMatrix< MatrixValue, Device, Layout > matrix_type
host_mirror_type HostMirror
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)