10 #ifndef STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP 
   11 #define STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP 
   13 #include "Kokkos_Core.hpp" 
   28 template< 
typename ValueType, 
class ExecutionSpace >
 
   40 #elif defined(__MIC__) 
   49 #if defined( KOKKOS_ENABLE_CUDA ) 
   50     std::is_same<ExecutionSpace,Kokkos::Cuda>::value;
 
   62   typedef Kokkos::View< value_type[], execution_space >  
vec_type;
 
   64   typedef Kokkos::View< size_type[][2], Kokkos::LayoutLeft, execution_space > 
coord2_array_type;
 
  147   KOKKOS_INLINE_FUNCTION
 
  151   KOKKOS_INLINE_FUNCTION
 
  156   KOKKOS_INLINE_FUNCTION
 
  161   KOKKOS_INLINE_FUNCTION
 
  166   KOKKOS_INLINE_FUNCTION
 
  171   KOKKOS_INLINE_FUNCTION
 
  176   KOKKOS_INLINE_FUNCTION
 
  181   KOKKOS_INLINE_FUNCTION
 
  186   KOKKOS_INLINE_FUNCTION
 
  191   KOKKOS_INLINE_FUNCTION
 
  196   KOKKOS_INLINE_FUNCTION
 
  201   KOKKOS_INLINE_FUNCTION
 
  206   KOKKOS_INLINE_FUNCTION
 
  211   KOKKOS_INLINE_FUNCTION
 
  216   KOKKOS_INLINE_FUNCTION
 
  221   KOKKOS_INLINE_FUNCTION
 
  226   KOKKOS_INLINE_FUNCTION
 
  231   KOKKOS_INLINE_FUNCTION
 
  235   template <
typename OrdinalType>
 
  244     const size_type max_tiles = params.
get<
int>(
"Max Tiles");
 
  252     typedef typename rcb_type::Box box_type;
 
  253     rcb_type rcb(tile_size, max_tiles, coordinate_list());
 
  256     size_type num_parts = rcb.get_num_parts();
 
  261     for (
size_type part=0; part<num_parts; ++part) {
 
  266       coord_work[part].
resize(num_rows, 0);
 
  270         size_type i = box->coords[c](0) - box->xmin;
 
  271         ++(coord_work[part][i]);
 
  277     for (
size_type part=0; part<num_parts; ++part) {
 
  283           coord_work[part][i] += pad;
 
  312     typename coord_array_type::host_mirror_type host_coord =
 
  314     typename coord2_array_type::host_mirror_type host_coord2 =
 
  316     typename coord_offset_type::host_mirror_type host_coord_offset =
 
  318     typename coord_range_type::host_mirror_type host_coord_range =
 
  320     typename value_array_type::host_mirror_type host_value =
 
  322     typename entry_array_type::host_mirror_type host_num_entry =
 
  324     typename row_map_array_type::host_mirror_type host_row_map =
 
  326     typename num_row_array_type::host_mirror_type host_num_rows =
 
  331     for (
size_type part=0; part<num_parts; ++part) {
 
  333       host_row_map(part,0) = 
sum;
 
  335         sum += coord_work[part][t];
 
  336         host_row_map(part,t+1) = 
sum;
 
  341     for (
size_type part=0; part<num_parts; ++part) {
 
  344         coord_work[part][t] = host_row_map(part,t);
 
  349     for (
size_type part=0; part<num_parts; ++part) {
 
  352       host_coord_offset(part,0) = box->xmin;
 
  353       host_coord_offset(part,1) = box->ymin;
 
  354       host_coord_offset(part,2) = box->zmin;
 
  356       host_coord_range(part,0) = box->delta_x;
 
  357       host_coord_range(part,1) = box->delta_y;
 
  358       host_coord_range(part,2) = box->delta_z;
 
  360       host_num_rows(part) = coord_work[part].
size(); 
 
  371         ++coord_work[part][row];
 
  374         host_coord2(n,0) = j - box->ymin;
 
  375         host_coord2(n,1) = k - box->zmin;
 
  376         host_coord(n) = ( host_coord2(n,1) << 16 ) | host_coord2(n,0);
 
  378         ++host_num_entry(part,row);
 
  395     for (
size_type part=0; part<num_parts; ++part) {
 
  396       for ( 
size_type i = 0; i < host_num_rows(part); ++i ) {
 
  398                                        host_num_entry(part,i) );
 
  399         tensor.
m_flops += 5*host_num_entry(part,i) + 1;
 
  407 template< 
class Device, 
typename OrdinalType, 
typename ValueType >
 
  408 TiledCrsProductTensor<ValueType, Device>
 
  415     basis, Cijk, params );
 
  418 template < 
typename ValueType, 
typename Device >
 
  426   template< 
typename MatrixValue , 
typename VectorValue >
 
  427   KOKKOS_INLINE_FUNCTION
 
  429                      const MatrixValue * 
const a ,
 
  430                      const VectorValue * 
const x ,
 
  431                            VectorValue * 
const y )
 
  438     for ( 
size_type tile = 0 ; tile < n_tile ; ++tile ) {
 
  446       for ( 
size_type i = 0 ; i < n_row ; ++i ) {
 
  450         const size_type iEntryEnd = iEntryBeg + nEntry;
 
  453         VectorValue ytmp = 0 ;
 
  456         if (block_size > 1) {
 
  457           const size_type nBlock = nEntry / block_size;
 
  458           const size_type nEntryB = nBlock * block_size;
 
  459           const size_type iEnd = iEntryBeg + nEntryB;
 
  463           int j[block_size], k[block_size];
 
  465           for ( ; iEntry < iEnd ; iEntry += block_size ) {
 
  467             for (
size_type ii=0; ii<block_size; ++ii) {
 
  468               j[ii] = tensor.
coord(iEntry+ii,0) + j_offset;
 
  469               k[ii] = tensor.
coord(iEntry+ii,1) + k_offset;
 
  471             TV aj(a, j), ak(a, k), xj(x, j), xk(x, k),
 
  472               c(&(tensor.
value(iEntry)));
 
  487         for ( ; iEntry<iEntryEnd; ++iEntry) {
 
  491           ytmp += tensor.
value(iEntry) * ( a[
j] * x[k] + a[k] * x[
j] );
 
  494         y[i+i_offset] += ytmp ;
 
  500   KOKKOS_INLINE_FUNCTION
 
  504   KOKKOS_INLINE_FUNCTION
 
coord_range_type m_coord_range
 
static const size_type host_vectorsize
 
KOKKOS_INLINE_FUNCTION size_type num_tiles() const 
Number tiles. 
 
TiledCrsProductTensor< ValueType, Device > tensor_type
 
static const bool use_intrinsics
 
KOKKOS_INLINE_FUNCTION const size_type & range(const size_type entry, const size_type c) const 
Coordinate range. 
 
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format. 
 
TiledCrsProductTensor & operator=(const TiledCrsProductTensor &rhs)
 
T & get(ParameterList &l, const std::string &name)
 
KOKKOS_INLINE_FUNCTION size_type dimension() const 
Dimension of the tensor. 
 
KOKKOS_INLINE_FUNCTION const size_type & num_entry(size_type tile, size_type i) const 
Number of entries with a coordinate 'i'. 
 
KOKKOS_INLINE_FUNCTION size_type tile_size() const 
Number tiles. 
 
KOKKOS_INLINE_FUNCTION size_type entry_count() const 
Number of sparse entries. 
 
Kokkos::View< size_type[][3], execution_space > coord_range_type
 
static KOKKOS_INLINE_FUNCTION size_type vector_size(const tensor_type &tensor)
 
Stokhos::CrsMatrix< ValueType, Device, Layout >::host_mirror_type create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
 
KOKKOS_INLINE_FUNCTION size_type entry_maximum() const 
Maximum sparse entries for any coordinate. 
 
TiledCrsProductTensor< ValueType, Device > create_tiled_product_tensor(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList ¶ms)
 
Kokkos::View< value_type[], execution_space > vec_type
 
Kokkos::View< size_type[][2], Kokkos::LayoutLeft, execution_space > coord2_array_type
 
static TiledCrsProductTensor create(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList ¶ms)
 
coord2_array_type m_coord2
 
KOKKOS_INLINE_FUNCTION PCE< Storage > max(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
 
static const size_type vectorsize
 
Kokkos::View< size_type[][3], execution_space > coord_offset_type
 
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry) const 
Coordinates of an entry. 
 
Kokkos::View< size_type[], execution_space > coord_array_type
 
KOKKOS_INLINE_FUNCTION const size_type * row_map_ptr() const 
Return row_map ptr. 
 
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
 
TiledCrsProductTensor(const TiledCrsProductTensor &rhs)
 
void resize(size_type new_size, const value_type &x=value_type())
 
Kokkos::View< value_type[], execution_space > value_array_type
 
KOKKOS_INLINE_FUNCTION const size_type & offset(const size_type entry, const size_type c) const 
Coordinate offset. 
 
coord_offset_type m_coord_offset
 
Kokkos::View< size_type **, layout_type, execution_space > row_map_array_type
 
KOKKOS_INLINE_FUNCTION size_type num_non_zeros() const 
Number of non-zero's. 
 
Device::size_type size_type
 
static KOKKOS_INLINE_FUNCTION size_type matrix_size(const tensor_type &tensor)
 
entry_array_type m_num_entry
 
Kokkos::LayoutRight layout_type
 
static const bool is_cuda
 
KOKKOS_INLINE_FUNCTION size_type entry_end(size_type tile, size_type i) const 
End entries with a coordinate 'i'. 
 
KOKKOS_INLINE_FUNCTION size_type num_flops() const 
Number flop's per multiply-add. 
 
KOKKOS_INLINE_FUNCTION const value_type & value(const size_type entry) const 
Value of an entry. 
 
KOKKOS_INLINE_FUNCTION size_type num_rows(size_type tile) const 
Number of rows in given tile. 
 
KOKKOS_INLINE_FUNCTION const size_type & entry_begin(size_type tile, size_type i) const 
Begin entries with a coordinate 'i'. 
 
Kokkos::View< size_type[], execution_space > num_row_array_type
 
Kokkos::View< size_type **, layout_type, execution_space > entry_array_type
 
Teuchos::ArrayRCP< CijkData< ordinal_type, scalar_type > > build_cijk_coordinate_list(const Sparse3Tensor< ordinal_type, scalar_type > &Cijk, CijkSymmetryType symmetry_type)
 
num_row_array_type m_num_rows
 
static const size_type cuda_vectorsize
 
KOKKOS_INLINE_FUNCTION size_type max_num_rows() const 
Maximum number of rows in any tile. 
 
static KOKKOS_INLINE_FUNCTION void apply(const tensor_type &tensor, const MatrixValue *const a, const VectorValue *const x, VectorValue *const y)
 
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
 
virtual ordinal_type size() const =0
Return total size of basis. 
 
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry, const size_type c) const 
Coordinates of an entry. 
 
static const size_type tensor_align
 
ExecutionSpace execution_space
 
row_map_array_type m_row_map