42 #ifndef STOKHOS_CRSMATRIX_HPP 
   43 #define STOKHOS_CRSMATRIX_HPP 
   48 #include "Kokkos_Core.hpp" 
   49 #include "Kokkos_StaticCrsGraph.hpp" 
   59     Dim3(
const size_t x_, 
const size_t y_ = 1, 
const size_t z_ = 1) :
 
   60       x(x_), 
y(y_), 
z(z_) {}
 
   68                const size_t threads_per_block_x_,
 
   69                const size_t threads_per_block_y_ = 1,
 
   70                const size_t threads_per_block_z_ = 1) :
 
   71     block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
 
   78 template <
typename ValueType, 
typename Device,
 
   79           typename Layout = Kokkos::LayoutRight>
 
   84   typedef Kokkos::View< value_type[], Layout, execution_space > 
values_type;
 
   85 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018 
   86   typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , int > 
graph_type;
 
   88   typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , void, int > 
graph_type;
 
  102 template <
typename MatrixValue,
 
  105           typename InputVectorType,
 
  106           typename OutputVectorType>
 
  136   KOKKOS_INLINE_FUNCTION
 
  139     const size_type iEntryBegin = m_A.graph.row_map[iRow];
 
  140     const size_type iEntryEnd   = m_A.graph.row_map[iRow+1];
 
  144     for ( 
size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry ) {
 
  145       sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry) );
 
  155     const size_t row_count = A.
graph.row_map.extent(0) - 1;
 
  156     Kokkos::parallel_for( row_count, 
Multiply(A,x,y) );
 
  161 template <
typename MatrixValue,
 
  164           typename InputMultiVectorType,
 
  165           typename OutputMultiVectorType,
 
  166           typename OrdinalType >
 
  168                 InputMultiVectorType,
 
  169                 OutputMultiVectorType,
 
  170                 std::vector<OrdinalType>,
 
  196   , m_col_indices( col_indices )
 
  197   , m_num_vecs( col_indices.size() )
 
  202   KOKKOS_INLINE_FUNCTION
 
  205     const size_type iEntryBegin = m_A.graph.row_map[iRow];
 
  206     const size_type iEntryEnd   = m_A.graph.row_map[iRow+1];
 
  213       for ( 
size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
 
  214         sum += m_A.values(iEntry) * m_x(  m_A.graph.entries(iEntry), iCol );
 
  217       m_y( iRow, iCol ) = 
sum;
 
  228     const size_t n = A.
graph.row_map.extent(0) - 1 ;
 
  231     const size_t block_size = 20;
 
  232     const size_t num_vecs = col.size();
 
  233     std::vector<OrdinalType> block_col;
 
  234     block_col.reserve(block_size);
 
  235     for (
size_t block=0; block<num_vecs; block+=block_size) {
 
  237         block+block_size <= num_vecs ? block_size : num_vecs-block;
 
  238       block_col.resize(bs);
 
  239       for (
size_t i=0; i<bs; ++i)
 
  240         block_col[i] = col[block+i];
 
  241       Kokkos::parallel_for( n , 
Multiply(A,x,y,block_col) );
 
  252 template <
typename MatrixValue,
 
  255           typename InputMultiVectorType,
 
  256           typename OutputMultiVectorType >
 
  258                 InputMultiVectorType,
 
  259                 OutputMultiVectorType,
 
  287   , m_num_row( A.graph.row_map.extent(0)-1 )
 
  288   , m_num_col( m_y.extent(1) )
 
  294   KOKKOS_INLINE_FUNCTION
 
  299       iBlockRow+m_block_row_size <= m_num_row ?
 
  300       m_block_row_size : m_num_row-iBlockRow;
 
  303     for (
size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
 
  306         iBlockCol+m_block_col_size <= m_num_col ?
 
  307         m_block_col_size : m_num_col-iBlockCol;
 
  310       const size_type iRowEnd = iBlockRow + num_row;
 
  311       for (
size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
 
  314         const size_type iEntryBegin = m_A.graph.row_map[iRow];
 
  315         const size_type iEntryEnd   = m_A.graph.row_map[iRow+1];
 
  318         const size_type iColEnd = iBlockCol + num_col;
 
  319         for (
size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
 
  323           for (
size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
 
  324             sum += m_A.values(iEntry) * m_x(  m_A.graph.entries(iEntry), iCol );
 
  326           m_y( iRow, iCol ) = 
sum;
 
  342     const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
 
  343     Kokkos::parallel_for( n , 
Multiply(A,x,y) );
 
  348 template <
typename MatrixValue,
 
  351           typename InputMultiVectorType,
 
  352           typename OutputMultiVectorType >
 
  353 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
 
  354                 InputMultiVectorType,
 
  355                 OutputMultiVectorType,
 
  360   typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
 
  361   typedef InputMultiVectorType input_multi_vector_type;
 
  362   typedef OutputMultiVectorType output_multi_vector_type;
 
  365   typedef typename execution_space::size_type size_type;
 
  368   const matrix_type m_A;
 
  369   const input_multi_vector_type m_x;
 
  370   output_multi_vector_type m_y;
 
  371   const size_type m_num_vecs;
 
  373   Multiply( 
const matrix_type& 
A,
 
  374             const input_multi_vector_type& x,
 
  375             output_multi_vector_type& y)
 
  379   , m_num_vecs( m_y.extent(1) )
 
  384   KOKKOS_INLINE_FUNCTION
 
  385   void operator()( 
const size_type iRow )
 const 
  387     const size_type iEntryBegin = m_A.graph.row_map[iRow];
 
  388     const size_type iEntryEnd   = m_A.graph.row_map[iRow+1];
 
  390     for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
 
  394       for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
 
  395         sum += m_A.values(iEntry) * m_x(  m_A.graph.entries(iEntry), iCol );
 
  398       m_y( iRow, iCol ) = 
sum;
 
  404   static void apply( 
const matrix_type& 
A,
 
  405                      const input_multi_vector_type& x,
 
  406                      output_multi_vector_type& y )
 
  408     const size_t n = A.graph.row_map.extent(0) - 1 ;
 
  409     Kokkos::parallel_for( n , Multiply(A,x,y) );
 
  432 template <
typename MatrixValue,
 
  435           typename InputViewType,
 
  436           typename OutputViewType>
 
  438                 std::vector<InputViewType>,
 
  439                 std::vector<OutputViewType>,
 
  467   , m_num_row( A.graph.row_map.extent(0)-1 )
 
  468   , m_num_col( x.size() )
 
  474   KOKKOS_INLINE_FUNCTION
 
  479       iBlockRow+m_block_row_size <= m_num_row ?
 
  480       m_block_row_size : m_num_row-iBlockRow;
 
  483     for (
size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
 
  486         iBlockCol+m_block_col_size <= m_num_col ?
 
  487         m_block_col_size : m_num_col-iBlockCol;
 
  490       const size_type iRowEnd = iBlockRow + num_row;
 
  491       for (
size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
 
  494         const size_type iEntryBegin = m_A.graph.row_map[iRow];
 
  495         const size_type iEntryEnd   = m_A.graph.row_map[iRow+1];
 
  498         const size_type iColEnd = iBlockCol + num_col;
 
  499         for (
size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
 
  503           for (
size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
 
  504             sum += m_A.values(iEntry) * m_x[iCol](m_A.graph.entries(iEntry));
 
  506           m_y[iCol](iRow) = sum;
 
  522     const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
 
  523     Kokkos::parallel_for( n , 
Multiply(A,x,y) );
 
  528 template <
typename MatrixValue,
 
  531           typename InputViewType,
 
  532           typename OutputViewType>
 
  533 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
 
  534                 std::vector<InputViewType>,
 
  535                 std::vector<OutputViewType>,
 
  540   typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
 
  541   typedef std::vector<InputViewType> input_multi_vector_type;
 
  542   typedef std::vector<OutputViewType> output_multi_vector_type;
 
  545   typedef typename execution_space::size_type size_type;
 
  548   const matrix_type m_A;
 
  549   const input_multi_vector_type m_x;
 
  550   output_multi_vector_type m_y;
 
  551   const size_type m_num_vecs;
 
  553   Multiply( 
const matrix_type& A,
 
  554             const input_multi_vector_type& x,
 
  555             output_multi_vector_type& y )
 
  559   , m_num_vecs( x.size() )
 
  565   KOKKOS_INLINE_FUNCTION
 
  566   void operator()( 
const size_type iRow )
 const 
  568     const size_type iEntryBegin = m_A.graph.row_map[iRow];
 
  569     const size_type iEntryEnd   = m_A.graph.row_map[iRow+1];
 
  571     for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
 
  575       for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
 
  576         sum += m_A.values(iEntry) * m_x[iCol](  m_A.graph.entries(iEntry) );
 
  579       m_y[iCol]( iRow) = sum;
 
  585   static void apply( 
const matrix_type & A,
 
  586                      const input_multi_vector_type& x,
 
  587                      output_multi_vector_type& y )
 
  589     const size_t n = A.graph.row_map.extent(0) - 1 ;
 
  590     Kokkos::parallel_for( n , Multiply(A,x,y) );
 
  615 template <
typename MatrixValue,
 
  618           typename InputMultiVectorType,
 
  619           typename OutputMultiVectorType,
 
  620           typename OrdinalType>
 
  622               const InputMultiVectorType& x,
 
  623               OutputMultiVectorType& y,
 
  624               const std::vector<OrdinalType>& col_indices,
 
  629   typedef Kokkos::View<typename InputMultiVectorType::value_type*, typename InputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> InputVectorType;
 
  630   typedef Kokkos::View<typename OutputMultiVectorType::value_type*, typename OutputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> OutputVectorType;
 
  632   for (
size_t i=0; i<col_indices.size(); ++i) {
 
  633     InputVectorType x_view =
 
  634       Kokkos::subview( x , Kokkos::ALL() , col_indices[i] );
 
  635     OutputVectorType y_view =
 
  636       Kokkos::subview( y , Kokkos::ALL() , col_indices[i] );
 
  637     multiply_type::apply( A , x_view , y_view );
 
  641 template <
typename MatrixValue,
 
  644           typename InputVectorType,
 
  645           typename OutputVectorType>
 
  647               const std::vector<InputVectorType>& x,
 
  648               std::vector<OutputVectorType>& y,
 
  653   for (
size_t i=0; i<x.size(); ++i) {
 
  654     multiply_type::apply( A , x[i] , y[i] );
 
  665 template <
typename ValueType, 
typename Layout, 
typename Device>
 
  675 template <
typename ValueType, 
typename Layout, 
typename Device>
 
  685 template <
typename ValueType, 
typename Layout, 
typename DstDevice,
 
  701 template < 
typename MatrixValue, 
typename Layout, 
typename Device >
 
  710     std::ofstream file(filename.c_str());
 
  712     file.setf(std::ios::scientific);
 
  720     file << 
"%%MatrixMarket matrix coordinate real general" << std::endl;
 
  721     file << nRow << 
" " << nRow << 
" " << hA.
values.extent(0) << std::endl;
 
  726       for (
size_type entry=entryBegin; entry<entryEnd; ++entry) {
 
  727         file << row+1 << 
" " << hA.
graph.entries(entry)+1 << 
" " 
  728              << std::setw(22) << hA.
values(entry) << std::endl;
 
KOKKOS_INLINE_FUNCTION void operator()(const size_type iBlockRow) const 
const size_type m_num_row
output_multi_vector_type m_y
const input_multi_vector_type m_x
const size_type m_num_col
Kokkos::DefaultExecutionSpace execution_space
execution_space::size_type size_type
Multiply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
CrsMatrix(Stokhos::DeviceConfig dev_config_)
CrsMatrix< ValueType, typename values_type::host_mirror_space, Layout > HostMirror
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
std::vector< InputViewType > input_multi_vector_type
std::vector< OutputViewType > output_multi_vector_type
static void write(const matrix_type &A, const std::string &filename)
DeviceConfig(const size_t num_blocks_, const size_t threads_per_block_x_, const size_t threads_per_block_y_=1, const size_t threads_per_block_z_=1)
CrsMatrix< MatrixValue, Device, Layout > matrix_type
Kokkos::View< value_type[], Layout, execution_space > values_type
size_t num_threads_per_block
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
execution_space::size_type size_type
Kokkos::StaticCrsGraph< int, Layout, execution_space, void, int > graph_type
Stokhos::DeviceConfig dev_config
OutputViewType::value_type scalar_type
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
CrsMatrix< MatrixValue, Device, Layout > matrix_type
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)