42 #ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP 
   43 #define STOKHOS_CUDA_BLOCKCRSMATRIX_HPP 
   49 #include "Kokkos_Core.hpp" 
   56 template< 
class BlockSpec , 
typename MatrixValue , 
typename VectorValue >
 
   59   Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > ,
 
   60   Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > >
 
   66   typedef Kokkos::View< VectorValue** ,Kokkos::LayoutLeft , Kokkos::Cuda > 
block_vector_type ;
 
   90     const size_type blockCount = m_A.graph.row_map.extent(0) - 1 ;
 
   93                     iBlock < blockCount ; iBlock += gridDim.x ) {
 
   96       const size_type iEntryEnd = m_A.graph.row_map[iBlock+1];
 
   97             size_type iEntry    = m_A.graph.row_map[iBlock];
 
   99       for ( ; iEntry < iEntryEnd ; ++iEntry ) {
 
  100         const VectorValue * 
const x = & m_x( 0 , m_A.graph.entries(iEntry) );
 
  101         const MatrixValue * 
const a = & m_A.values( 0 , iEntry );
 
  106       if ( threadIdx.x + blockDim.x * threadIdx.y < m_A.block.dimension() ) {
 
  107         m_y(threadIdx.x,iBlock) = y ;
 
  117       Kokkos::Impl::cuda_internal_maximum_warp_count() * Kokkos::Impl::CudaTraits::WarpSize ;
 
  122       std::min( row_count , Kokkos::Impl::cuda_internal_maximum_grid_count() ) , 1 , 1 );
 
  128     if ( thread_max < block.x * block.y ) {
 
  129       std::ostringstream msg ;
 
  130       msg << 
"Kokkos::Impl::Multiply< BlockCrsMatrix< Block , Value , Cuda > , ... >" 
  131           << 
" ERROR: block dimension = " << block.x * block.y
 
  132           << 
" > " << thread_max << 
"== maximum Cuda threads per block" ;
 
  133       throw std::runtime_error(msg.str());
 
  136     Kokkos::Impl::cuda_parallel_launch_local_memory<<< grid , block , shmem >>>( 
Multiply(A,x,y) );
 
Multiply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
const block_vector_type m_y
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
Kokkos::Cuda execution_space
const block_vector_type m_x
execution_space::size_type size_type
__device__ void operator()(void) const 
static void apply(const matrix_type &A, const block_vector_type &x, const block_vector_type &y)
BlockCrsMatrix< BlockSpec, MatrixValue, execution_space > matrix_type
CRS matrix of dense blocks. 
Kokkos::View< VectorValue **,Kokkos::LayoutLeft, Kokkos::Cuda > block_vector_type