10 #ifndef STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP
11 #define STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP
13 #include "Kokkos_Core.hpp"
28 template<
typename ValueType,
class ExecutionSpace >
40 #elif defined(__MIC__)
49 #if defined( KOKKOS_ENABLE_CUDA )
50 std::is_same<ExecutionSpace,Kokkos::Cuda>::value;
62 typedef Kokkos::View< value_type[], execution_space >
vec_type;
64 typedef Kokkos::View< size_type[][2], Kokkos::LayoutLeft, execution_space >
coord2_array_type;
147 KOKKOS_INLINE_FUNCTION
151 KOKKOS_INLINE_FUNCTION
156 KOKKOS_INLINE_FUNCTION
161 KOKKOS_INLINE_FUNCTION
166 KOKKOS_INLINE_FUNCTION
171 KOKKOS_INLINE_FUNCTION
176 KOKKOS_INLINE_FUNCTION
181 KOKKOS_INLINE_FUNCTION
186 KOKKOS_INLINE_FUNCTION
191 KOKKOS_INLINE_FUNCTION
196 KOKKOS_INLINE_FUNCTION
201 KOKKOS_INLINE_FUNCTION
206 KOKKOS_INLINE_FUNCTION
211 KOKKOS_INLINE_FUNCTION
216 KOKKOS_INLINE_FUNCTION
221 KOKKOS_INLINE_FUNCTION
226 KOKKOS_INLINE_FUNCTION
231 KOKKOS_INLINE_FUNCTION
235 template <
typename OrdinalType>
244 const size_type max_tiles = params.
get<
int>(
"Max Tiles");
252 typedef typename rcb_type::Box box_type;
253 rcb_type rcb(tile_size, max_tiles, coordinate_list());
256 size_type num_parts = rcb.get_num_parts();
261 for (
size_type part=0; part<num_parts; ++part) {
266 coord_work[part].
resize(num_rows, 0);
270 size_type i = box->coords[c](0) - box->xmin;
271 ++(coord_work[part][i]);
277 for (
size_type part=0; part<num_parts; ++part) {
283 coord_work[part][i] += pad;
312 typename coord_array_type::HostMirror host_coord =
314 typename coord2_array_type::HostMirror host_coord2 =
316 typename coord_offset_type::HostMirror host_coord_offset =
318 typename coord_range_type::HostMirror host_coord_range =
320 typename value_array_type::HostMirror host_value =
322 typename entry_array_type::HostMirror host_num_entry =
324 typename row_map_array_type::HostMirror host_row_map =
326 typename num_row_array_type::HostMirror host_num_rows =
331 for (
size_type part=0; part<num_parts; ++part) {
333 host_row_map(part,0) =
sum;
335 sum += coord_work[part][t];
336 host_row_map(part,t+1) =
sum;
341 for (
size_type part=0; part<num_parts; ++part) {
344 coord_work[part][t] = host_row_map(part,t);
349 for (
size_type part=0; part<num_parts; ++part) {
352 host_coord_offset(part,0) = box->xmin;
353 host_coord_offset(part,1) = box->ymin;
354 host_coord_offset(part,2) = box->zmin;
356 host_coord_range(part,0) = box->delta_x;
357 host_coord_range(part,1) = box->delta_y;
358 host_coord_range(part,2) = box->delta_z;
360 host_num_rows(part) = coord_work[part].
size();
371 ++coord_work[part][row];
374 host_coord2(n,0) = j - box->ymin;
375 host_coord2(n,1) = k - box->zmin;
376 host_coord(n) = ( host_coord2(n,1) << 16 ) | host_coord2(n,0);
378 ++host_num_entry(part,row);
395 for (
size_type part=0; part<num_parts; ++part) {
396 for (
size_type i = 0; i < host_num_rows(part); ++i ) {
398 host_num_entry(part,i) );
399 tensor.
m_flops += 5*host_num_entry(part,i) + 1;
407 template<
class Device,
typename OrdinalType,
typename ValueType >
408 TiledCrsProductTensor<ValueType, Device>
415 basis, Cijk, params );
418 template <
typename ValueType,
typename Device >
426 template<
typename MatrixValue ,
typename VectorValue >
427 KOKKOS_INLINE_FUNCTION
429 const MatrixValue *
const a ,
430 const VectorValue *
const x ,
431 VectorValue *
const y )
438 for (
size_type tile = 0 ; tile < n_tile ; ++tile ) {
446 for (
size_type i = 0 ; i < n_row ; ++i ) {
450 const size_type iEntryEnd = iEntryBeg + nEntry;
453 VectorValue ytmp = 0 ;
456 if (block_size > 1) {
457 const size_type nBlock = nEntry / block_size;
458 const size_type nEntryB = nBlock * block_size;
459 const size_type iEnd = iEntryBeg + nEntryB;
463 int j[block_size], k[block_size];
465 for ( ; iEntry < iEnd ; iEntry += block_size ) {
467 for (
size_type ii=0; ii<block_size; ++ii) {
468 j[ii] = tensor.
coord(iEntry+ii,0) + j_offset;
469 k[ii] = tensor.
coord(iEntry+ii,1) + k_offset;
471 TV aj(a, j), ak(a, k), xj(x, j), xk(x, k),
472 c(&(tensor.
value(iEntry)));
487 for ( ; iEntry<iEntryEnd; ++iEntry) {
491 ytmp += tensor.
value(iEntry) * ( a[
j] * x[k] + a[k] * x[
j] );
494 y[i+i_offset] += ytmp ;
500 KOKKOS_INLINE_FUNCTION
504 KOKKOS_INLINE_FUNCTION
coord_range_type m_coord_range
static const size_type host_vectorsize
KOKKOS_INLINE_FUNCTION size_type num_tiles() const
Number tiles.
TiledCrsProductTensor< ValueType, Device > tensor_type
static const bool use_intrinsics
KOKKOS_INLINE_FUNCTION const size_type & range(const size_type entry, const size_type c) const
Coordinate range.
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
TiledCrsProductTensor & operator=(const TiledCrsProductTensor &rhs)
T & get(ParameterList &l, const std::string &name)
KOKKOS_INLINE_FUNCTION size_type dimension() const
Dimension of the tensor.
KOKKOS_INLINE_FUNCTION const size_type & num_entry(size_type tile, size_type i) const
Number of entries with a coordinate 'i'.
KOKKOS_INLINE_FUNCTION size_type tile_size() const
Number tiles.
KOKKOS_INLINE_FUNCTION size_type entry_count() const
Number of sparse entries.
Kokkos::View< size_type[][3], execution_space > coord_range_type
static KOKKOS_INLINE_FUNCTION size_type vector_size(const tensor_type &tensor)
KOKKOS_INLINE_FUNCTION size_type entry_maximum() const
Maximum sparse entries for any coordinate.
TiledCrsProductTensor< ValueType, Device > create_tiled_product_tensor(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList ¶ms)
Kokkos::View< value_type[], execution_space > vec_type
Kokkos::View< size_type[][2], Kokkos::LayoutLeft, execution_space > coord2_array_type
static TiledCrsProductTensor create(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList ¶ms)
coord2_array_type m_coord2
KOKKOS_INLINE_FUNCTION PCE< Storage > max(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
static const size_type vectorsize
Kokkos::View< size_type[][3], execution_space > coord_offset_type
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry) const
Coordinates of an entry.
Kokkos::View< size_type[], execution_space > coord_array_type
KOKKOS_INLINE_FUNCTION const size_type * row_map_ptr() const
Return row_map ptr.
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
TiledCrsProductTensor(const TiledCrsProductTensor &rhs)
void resize(size_type new_size, const value_type &x=value_type())
Kokkos::View< value_type[], execution_space > value_array_type
KOKKOS_INLINE_FUNCTION const size_type & offset(const size_type entry, const size_type c) const
Coordinate offset.
coord_offset_type m_coord_offset
Kokkos::View< size_type **, layout_type, execution_space > row_map_array_type
KOKKOS_INLINE_FUNCTION size_type num_non_zeros() const
Number of non-zero's.
Device::size_type size_type
static KOKKOS_INLINE_FUNCTION size_type matrix_size(const tensor_type &tensor)
entry_array_type m_num_entry
Kokkos::LayoutRight layout_type
static const bool is_cuda
KOKKOS_INLINE_FUNCTION size_type entry_end(size_type tile, size_type i) const
End entries with a coordinate 'i'.
KOKKOS_INLINE_FUNCTION size_type num_flops() const
Number flop's per multiply-add.
KOKKOS_INLINE_FUNCTION const value_type & value(const size_type entry) const
Value of an entry.
KOKKOS_INLINE_FUNCTION size_type num_rows(size_type tile) const
Number of rows in given tile.
KOKKOS_INLINE_FUNCTION const size_type & entry_begin(size_type tile, size_type i) const
Begin entries with a coordinate 'i'.
Kokkos::View< size_type[], execution_space > num_row_array_type
Kokkos::View< size_type **, layout_type, execution_space > entry_array_type
Teuchos::ArrayRCP< CijkData< ordinal_type, scalar_type > > build_cijk_coordinate_list(const Sparse3Tensor< ordinal_type, scalar_type > &Cijk, CijkSymmetryType symmetry_type)
num_row_array_type m_num_rows
static const size_type cuda_vectorsize
KOKKOS_INLINE_FUNCTION size_type max_num_rows() const
Maximum number of rows in any tile.
static KOKKOS_INLINE_FUNCTION void apply(const tensor_type &tensor, const MatrixValue *const a, const VectorValue *const x, VectorValue *const y)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
virtual ordinal_type size() const =0
Return total size of basis.
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry, const size_type c) const
Coordinates of an entry.
static const size_type tensor_align
ExecutionSpace execution_space
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
row_map_array_type m_row_map