42 #ifndef KOKKOS_CUDA_SYMMETRIC_DIAGONAL_SPEC_HPP 
   43 #define KOKKOS_CUDA_SYMMETRIC_DIAGONAL_SPEC_HPP 
   47 #include "Kokkos_Core.hpp" 
   69     const int warp_size = Kokkos::Impl::CudaTraits::WarpSize;
 
   70     const int y = ( Kokkos::Impl::cuda_internal_maximum_warp_count() * warp_size ) / d ;
 
   73       throw std::runtime_error( std::string(
"Stokhos::Multiply< SymmetricDiagonalSpec<Cuda> > ERROR: block too large") );
 
   77     return dim3( d , 
std::min( y , ( 1 + d ) / 2 ) , 1 );
 
   80   template< 
typename VectorValue >
 
   84     const dim3 d = thread_block( block );
 
   86     return sizeof(VectorValue) * d.x * d.y ;
 
   96   template< 
typename MatrixValue , 
typename VectorValue >
 
   99                             const MatrixValue * 
const a ,
 
  100                             const VectorValue * 
const x )
 
  103     const int  dim_half  = ( dimension + 1 ) >> 1 ;
 
  105     VectorValue * 
const shX = kokkos_impl_cuda_shared_memory<VectorValue>();
 
  112     if ( 0 == threadIdx.y ) {
 
  115       shX[ threadIdx.x ] = x[ threadIdx.x ]; 
 
  117       y = shX[ threadIdx.x ] * a[ threadIdx.x ];
 
  122     if ( 0 == threadIdx.y && ! ( dimension & 01 ) ) {
 
  128       ia = threadIdx.x + dim_half * dimension ;
 
  130       if ( threadIdx.x < dim_half ) {
 
  137       y += shX[ ix ] * a[ ia ];
 
  142     const int A_stride = blockDim.y * dimension ;
 
  144     int d = 1 + threadIdx.y ;
 
  146     const MatrixValue * 
A = a + d * dimension ;
 
  148     for ( ; d < dim_half ; d += blockDim.y , A += A_stride ) {
 
  150       ix = threadIdx.x + d ; 
if ( dimension <= ix ) ix -= dimension ;
 
  151       ia = threadIdx.x - d ; 
if ( ia < 0 ) ia += dimension ;
 
  158       y += shX[ ix ] * A[ threadIdx.x ] +
 
  162     if ( 0 < threadIdx.y ) {
 
  163       shX[ threadIdx.x + threadIdx.y * dimension ] = y ;
 
  168     for ( ix = 1 ; ix < blockDim.y ; ++ix ) {
 
  169       y += shX[ threadIdx.x + ix * dimension ];
 
Kokkos::Cuda execution_space
static __host__ size_type matrix_size(const block_type &block)
KOKKOS_INLINE_FUNCTION unsigned matrix_size() const 
Storage size for block coefficients. 
Symmetric diagonal storage for a dense matrix. 
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
SymmetricDiagonalSpec< Kokkos::Cuda > block_type
static __device__ VectorValue apply(const block_type &block, const MatrixValue *const a, const VectorValue *const x)
static __host__ size_type shmem_size(const block_type &block)
KOKKOS_INLINE_FUNCTION unsigned dimension() const 
Dimension of vector block. 
static __host__ dim3 thread_block(const block_type &block)
execution_space::size_type size_type