42 #ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
43 #define STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
49 #include "Kokkos_Core.hpp"
56 template<
class BlockSpec ,
typename MatrixValue ,
typename VectorValue >
59 Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > ,
60 Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > >
66 typedef Kokkos::View< VectorValue** ,Kokkos::LayoutLeft , Kokkos::Cuda >
block_vector_type ;
90 const size_type blockCount = m_A.graph.row_map.extent(0) - 1 ;
93 iBlock < blockCount ; iBlock += gridDim.x ) {
96 const size_type iEntryEnd = m_A.graph.row_map[iBlock+1];
97 size_type iEntry = m_A.graph.row_map[iBlock];
99 for ( ; iEntry < iEntryEnd ; ++iEntry ) {
100 const VectorValue *
const x = & m_x( 0 , m_A.graph.entries(iEntry) );
101 const MatrixValue *
const a = & m_A.values( 0 , iEntry );
106 if ( threadIdx.x + blockDim.x * threadIdx.y < m_A.block.dimension() ) {
107 m_y(threadIdx.x,iBlock) = y ;
117 Kokkos::Impl::cuda_internal_maximum_warp_count() * Kokkos::Impl::CudaTraits::WarpSize ;
122 std::min( row_count , Kokkos::Impl::cuda_internal_maximum_grid_count() ) , 1 , 1 );
128 if ( thread_max < block.x * block.y ) {
129 std::ostringstream msg ;
130 msg <<
"Kokkos::Impl::Multiply< BlockCrsMatrix< Block , Value , Cuda > , ... >"
131 <<
" ERROR: block dimension = " << block.x * block.y
132 <<
" > " << thread_max <<
"== maximum Cuda threads per block" ;
133 throw std::runtime_error(msg.str());
136 Kokkos::Impl::cuda_parallel_launch_local_memory<<< grid , block , shmem >>>(
Multiply(A,x,y) );
Multiply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
const block_vector_type m_y
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
Kokkos::Cuda execution_space
const block_vector_type m_x
execution_space::size_type size_type
__device__ void operator()(void) const
static void apply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
BlockCrsMatrix< BlockSpec, MatrixValue, execution_space > matrix_type
CRS matrix of dense blocks.
Kokkos::View< VectorValue **,Kokkos::LayoutLeft, Kokkos::Cuda > block_vector_type