10 #ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP 
   11 #define STOKHOS_CUDA_BLOCKCRSMATRIX_HPP 
   17 #include "Kokkos_Core.hpp" 
   24 template< 
class BlockSpec , 
typename MatrixValue , 
typename VectorValue >
 
   27   Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > ,
 
   28   Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > >
 
   34   typedef Kokkos::View< VectorValue** ,Kokkos::LayoutLeft , Kokkos::Cuda > 
block_vector_type ;
 
   58     const size_type blockCount = m_A.graph.row_map.extent(0) - 1 ;
 
   61                     iBlock < blockCount ; iBlock += gridDim.x ) {
 
   64       const size_type iEntryEnd = m_A.graph.row_map[iBlock+1];
 
   65             size_type iEntry    = m_A.graph.row_map[iBlock];
 
   67       for ( ; iEntry < iEntryEnd ; ++iEntry ) {
 
   68         const VectorValue * 
const x = & m_x( 0 , m_A.graph.entries(iEntry) );
 
   69         const MatrixValue * 
const a = & m_A.values( 0 , iEntry );
 
   74       if ( threadIdx.x + blockDim.x * threadIdx.y < m_A.block.dimension() ) {
 
   75         m_y(threadIdx.x,iBlock) = y ;
 
   84     const int warp_size = Kokkos::Impl::CudaTraits::WarpSize;
 
   85     auto const maxWarpCount = std::min<unsigned>(
 
   90       maxWarpCount * warp_size ;
 
   96       std::min( row_count , maxGridSizeX ) , 1 , 1 );
 
  102     if ( thread_max < block.x * block.y ) {
 
  103       std::ostringstream msg ;
 
  104       msg << 
"Kokkos::Impl::Multiply< BlockCrsMatrix< Block , Value , Cuda > , ... >" 
  105           << 
" ERROR: block dimension = " << block.x * block.y
 
  106           << 
" > " << thread_max << 
"== maximum Cuda threads per block" ;
 
  107       throw std::runtime_error(msg.str());
 
  110     Kokkos::Impl::cuda_parallel_launch_local_memory<<< grid , block , shmem >>>( 
Multiply(A,x,y) );
 
Multiply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
Kokkos::DefaultExecutionSpace execution_space
const block_vector_type m_y
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
Kokkos::Cuda execution_space
const block_vector_type m_x
execution_space::size_type size_type
__device__ void operator()(void) const 
static void apply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
BlockCrsMatrix< BlockSpec, MatrixValue, execution_space > matrix_type
CRS matrix of dense blocks. 
Kokkos::View< VectorValue **,Kokkos::LayoutLeft, Kokkos::Cuda > block_vector_type