Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_Cuda_BlockCrsMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
11 #define STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
12 
13 #include <utility>
14 #include <sstream>
15 #include <stdexcept>
16 
17 #include "Kokkos_Core.hpp"
18 
19 #include "Stokhos_Multiply.hpp"
21 
22 namespace Stokhos {
23 
24 template< class BlockSpec , typename MatrixValue , typename VectorValue >
25 class Multiply<
26  BlockCrsMatrix< BlockSpec , MatrixValue , Kokkos::Cuda > ,
27  Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > ,
28  Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > >
29 {
30 public:
31 
32  typedef Kokkos::Cuda execution_space ;
33  typedef execution_space::size_type size_type ;
34  typedef Kokkos::View< VectorValue** ,Kokkos::LayoutLeft , Kokkos::Cuda > block_vector_type ;
36 
37  const matrix_type m_A ;
40 
41  Multiply( const matrix_type & A ,
42  const block_vector_type & x ,
43  const block_vector_type & y )
44  : m_A( A )
45  , m_x( x )
46  , m_y( y )
47  {}
48 
49  //--------------------------------------------------------------------------
50  // A( storage_size( m_A.block.size() ) , m_A.graph.row_map.size() );
51  // x( m_A.block.dimension() , m_A.graph.row_map.first_count() );
52  // y( m_A.block.dimension() , m_A.graph.row_map.first_count() );
53  //
54 
55  __device__
56  void operator()(void) const
57  {
58  const size_type blockCount = m_A.graph.row_map.extent(0) - 1 ;
59 
60  for ( size_type iBlock = blockIdx.x ;
61  iBlock < blockCount ; iBlock += gridDim.x ) {
62  VectorValue y = 0 ;
63 
64  const size_type iEntryEnd = m_A.graph.row_map[iBlock+1];
65  size_type iEntry = m_A.graph.row_map[iBlock];
66 
67  for ( ; iEntry < iEntryEnd ; ++iEntry ) {
68  const VectorValue * const x = & m_x( 0 , m_A.graph.entries(iEntry) );
69  const MatrixValue * const a = & m_A.values( 0 , iEntry );
70 
71  y += BlockMultiply< BlockSpec >::apply( m_A.block , a , x );
72  }
73 
74  if ( threadIdx.x + blockDim.x * threadIdx.y < m_A.block.dimension() ) {
75  m_y(threadIdx.x,iBlock) = y ;
76  }
77  }
78  }
79 
80  static void apply( const matrix_type & A ,
81  const block_vector_type & x ,
82  const block_vector_type & y )
83  {
84  const int warp_size = Kokkos::Impl::CudaTraits::WarpSize;
85  auto const maxWarpCount = std::min<unsigned>(
86  execution_space().cuda_device_prop().maxThreadsPerBlock / warp_size,
87  warp_size);
88 
89  const size_type thread_max =
90  maxWarpCount * warp_size ;
91 
92  const size_type row_count = A.graph.row_map.extent(0) - 1 ;
93 
94  const size_type maxGridSizeX = execution_space().cuda_device_prop().maxGridSize[0];
95  const dim3 grid(
96  std::min( row_count , maxGridSizeX ) , 1 , 1 );
97  const dim3 block = BlockMultiply<BlockSpec>::thread_block( A.block );
98 
99  const size_type shmem =
100  BlockMultiply<BlockSpec>::template shmem_size<block_vector_type>( A.block );
101 
102  if ( thread_max < block.x * block.y ) {
103  std::ostringstream msg ;
104  msg << "Kokkos::Impl::Multiply< BlockCrsMatrix< Block , Value , Cuda > , ... >"
105  << " ERROR: block dimension = " << block.x * block.y
106  << " > " << thread_max << "== maximum Cuda threads per block" ;
107  throw std::runtime_error(msg.str());
108  }
109 
110  Kokkos::Impl::cuda_parallel_launch_local_memory<<< grid , block , shmem >>>( Multiply(A,x,y) );
111  }
112 };
113 
114 //----------------------------------------------------------------------------
115 
116 } // namespace Stokhos
117 
118 #endif /* #ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP */
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
CRS matrix of dense blocks.