10 #ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
11 #define STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
17 #include "Kokkos_Core.hpp"
24 template<
class BlockSpec ,
typename MatrixValue ,
typename VectorValue >
27 Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > ,
28 Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > >
34 typedef Kokkos::View< VectorValue** ,Kokkos::LayoutLeft , Kokkos::Cuda >
block_vector_type ;
58 const size_type blockCount = m_A.graph.row_map.extent(0) - 1 ;
61 iBlock < blockCount ; iBlock += gridDim.x ) {
64 const size_type iEntryEnd = m_A.graph.row_map[iBlock+1];
65 size_type iEntry = m_A.graph.row_map[iBlock];
67 for ( ; iEntry < iEntryEnd ; ++iEntry ) {
68 const VectorValue *
const x = & m_x( 0 , m_A.graph.entries(iEntry) );
69 const MatrixValue *
const a = & m_A.values( 0 , iEntry );
74 if ( threadIdx.x + blockDim.x * threadIdx.y < m_A.block.dimension() ) {
75 m_y(threadIdx.x,iBlock) = y ;
84 const int warp_size = Kokkos::Impl::CudaTraits::WarpSize;
85 auto const maxWarpCount = std::min<unsigned>(
90 maxWarpCount * warp_size ;
96 std::min( row_count , maxGridSizeX ) , 1 , 1 );
102 if ( thread_max < block.x * block.y ) {
103 std::ostringstream msg ;
104 msg <<
"Kokkos::Impl::Multiply< BlockCrsMatrix< Block , Value , Cuda > , ... >"
105 <<
" ERROR: block dimension = " << block.x * block.y
106 <<
" > " << thread_max <<
"== maximum Cuda threads per block" ;
107 throw std::runtime_error(msg.str());
110 Kokkos::Impl::cuda_parallel_launch_local_memory<<< grid , block , shmem >>>(
Multiply(A,x,y) );
Multiply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
Kokkos::DefaultExecutionSpace execution_space
const block_vector_type m_y
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
Kokkos::Cuda execution_space
const block_vector_type m_x
execution_space::size_type size_type
__device__ void operator()(void) const
static void apply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
BlockCrsMatrix< BlockSpec, MatrixValue, execution_space > matrix_type
CRS matrix of dense blocks.
Kokkos::View< VectorValue **,Kokkos::LayoutLeft, Kokkos::Cuda > block_vector_type