Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_Cuda_BlockCrsMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
43 #define STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
44 
45 #include <utility>
46 #include <sstream>
47 #include <stdexcept>
48 
49 #include "Kokkos_Core.hpp"
50 
51 #include "Stokhos_Multiply.hpp"
53 
54 namespace Stokhos {
55 
56 template< class BlockSpec , typename MatrixValue , typename VectorValue >
57 class Multiply<
58  BlockCrsMatrix< BlockSpec , MatrixValue , Kokkos::Cuda > ,
59  Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > ,
60  Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > >
61 {
62 public:
63 
64  typedef Kokkos::Cuda execution_space ;
65  typedef execution_space::size_type size_type ;
66  typedef Kokkos::View< VectorValue** ,Kokkos::LayoutLeft , Kokkos::Cuda > block_vector_type ;
68 
69  const matrix_type m_A ;
72 
73  Multiply( const matrix_type & A ,
74  const block_vector_type & x ,
75  const block_vector_type & y )
76  : m_A( A )
77  , m_x( x )
78  , m_y( y )
79  {}
80 
81  //--------------------------------------------------------------------------
82  // A( storage_size( m_A.block.size() ) , m_A.graph.row_map.size() );
83  // x( m_A.block.dimension() , m_A.graph.row_map.first_count() );
84  // y( m_A.block.dimension() , m_A.graph.row_map.first_count() );
85  //
86 
87  __device__
88  void operator()(void) const
89  {
90  const size_type blockCount = m_A.graph.row_map.extent(0) - 1 ;
91 
92  for ( size_type iBlock = blockIdx.x ;
93  iBlock < blockCount ; iBlock += gridDim.x ) {
94  VectorValue y = 0 ;
95 
96  const size_type iEntryEnd = m_A.graph.row_map[iBlock+1];
97  size_type iEntry = m_A.graph.row_map[iBlock];
98 
99  for ( ; iEntry < iEntryEnd ; ++iEntry ) {
100  const VectorValue * const x = & m_x( 0 , m_A.graph.entries(iEntry) );
101  const MatrixValue * const a = & m_A.values( 0 , iEntry );
102 
103  y += BlockMultiply< BlockSpec >::apply( m_A.block , a , x );
104  }
105 
106  if ( threadIdx.x + blockDim.x * threadIdx.y < m_A.block.dimension() ) {
107  m_y(threadIdx.x,iBlock) = y ;
108  }
109  }
110  }
111 
112  static void apply( const matrix_type & A ,
113  const block_vector_type & x ,
114  const block_vector_type & y )
115  {
116  const size_type thread_max =
117  Kokkos::Impl::cuda_internal_maximum_warp_count() * Kokkos::Impl::CudaTraits::WarpSize ;
118 
119  const size_type row_count = A.graph.row_map.extent(0) - 1 ;
120 
121  const dim3 grid(
122  std::min( row_count , Kokkos::Impl::cuda_internal_maximum_grid_count() ) , 1 , 1 );
123  const dim3 block = BlockMultiply<BlockSpec>::thread_block( A.block );
124 
125  const size_type shmem =
126  BlockMultiply<BlockSpec>::template shmem_size<block_vector_type>( A.block );
127 
128  if ( thread_max < block.x * block.y ) {
129  std::ostringstream msg ;
130  msg << "Kokkos::Impl::Multiply< BlockCrsMatrix< Block , Value , Cuda > , ... >"
131  << " ERROR: block dimension = " << block.x * block.y
132  << " > " << thread_max << "== maximum Cuda threads per block" ;
133  throw std::runtime_error(msg.str());
134  }
135 
136  Kokkos::Impl::cuda_parallel_launch_local_memory<<< grid , block , shmem >>>( Multiply(A,x,y) );
137  }
138 };
139 
140 //----------------------------------------------------------------------------
141 
142 } // namespace Stokhos
143 
144 #endif /* #ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP */
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
CRS matrix of dense blocks.