Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_TiledCrsProductTensor.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP
11 #define STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP
12 
13 #include "Kokkos_Core.hpp"
14 
15 #include "Stokhos_Multiply.hpp"
16 #include "Stokhos_ProductBasis.hpp"
20 #include "Stokhos_TinyVec.hpp"
21 
22 
23 //----------------------------------------------------------------------------
24 //----------------------------------------------------------------------------
25 
26 namespace Stokhos {
27 
28 template< typename ValueType, class ExecutionSpace >
30 public:
31 
32  typedef ExecutionSpace execution_space;
33  typedef int size_type;
34  typedef ValueType value_type;
35 
36 // Vectorsize used in multiply algorithm
37 #if defined(__AVX__)
38  static const size_type host_vectorsize = 32/sizeof(value_type);
39  static const bool use_intrinsics = true;
40 #elif defined(__MIC__)
41  static const size_type host_vectorsize = 16;
42  static const bool use_intrinsics = true;
43 #else
44  static const size_type host_vectorsize = 2;
45  static const bool use_intrinsics = false;
46 #endif
47  static const size_type cuda_vectorsize = 32;
48  static const bool is_cuda =
49 #if defined( KOKKOS_ENABLE_CUDA )
50  std::is_same<ExecutionSpace,Kokkos::Cuda>::value;
51 #else
52  false ;
53 #endif
55 
56  // Alignment in terms of number of entries of CRS rows
58 
59 private:
60 
61  typedef Kokkos::LayoutRight layout_type;
62  typedef Kokkos::View< value_type[], execution_space > vec_type;
63  typedef Kokkos::View< size_type[], execution_space > coord_array_type;
64  typedef Kokkos::View< size_type[][2], Kokkos::LayoutLeft, execution_space > coord2_array_type;
65  typedef Kokkos::View< size_type[][3], execution_space > coord_offset_type;
66  typedef Kokkos::View< size_type[][3], execution_space > coord_range_type;
67  typedef Kokkos::View< value_type[], execution_space > value_array_type;
68  typedef Kokkos::View< size_type**, layout_type, execution_space > entry_array_type;
69  typedef Kokkos::View< size_type**, layout_type, execution_space > row_map_array_type;
70  typedef Kokkos::View< size_type[], execution_space > num_row_array_type;
71 
86 
87 public:
88 
89  inline
91 
92  inline
94  m_coord(),
95  m_coord2(),
97  m_coord_range(),
98  m_value(),
99  m_num_entry(),
100  m_row_map(),
101  m_num_rows(),
102  m_dimension(0),
103  m_tile_size(0),
104  m_entry_max(0),
105  m_max_num_rows(0),
106  m_nnz(0),
107  m_flops(0) {}
108 
109  inline
111  m_coord( rhs.m_coord ),
112  m_coord2( rhs.m_coord2 ),
115  m_value( rhs.m_value ),
116  m_num_entry( rhs.m_num_entry ),
117  m_row_map( rhs.m_row_map ),
118  m_num_rows( rhs.m_num_rows ),
119  m_dimension( rhs.m_dimension ),
120  m_tile_size( rhs.m_tile_size ),
121  m_entry_max( rhs.m_entry_max ),
123  m_nnz( rhs.m_nnz ),
124  m_flops( rhs.m_flops ) {}
125 
126  inline
128  {
129  m_coord = rhs.m_coord;
130  m_coord2 = rhs.m_coord2;
133  m_value = rhs.m_value;
134  m_num_entry = rhs.m_num_entry;
135  m_row_map = rhs.m_row_map;
136  m_num_rows = rhs.m_num_rows;
137  m_dimension = rhs.m_dimension;
138  m_tile_size = rhs.m_tile_size;
139  m_entry_max = rhs.m_entry_max;
141  m_nnz = rhs.m_nnz;
142  m_flops = rhs.m_flops;
143  return *this;
144  }
145 
147  KOKKOS_INLINE_FUNCTION
148  size_type dimension() const { return m_dimension; }
149 
151  KOKKOS_INLINE_FUNCTION
153  { return m_coord.extent(0); }
154 
156  KOKKOS_INLINE_FUNCTION
158  { return m_entry_max; }
159 
161  KOKKOS_INLINE_FUNCTION
163  { return m_max_num_rows; }
164 
166  KOKKOS_INLINE_FUNCTION
168  { return m_num_rows(tile); }
169 
171  KOKKOS_INLINE_FUNCTION
172  const size_type& entry_begin( size_type tile, size_type i ) const
173  { return m_row_map(tile,i); }
174 
176  KOKKOS_INLINE_FUNCTION
178  { return m_row_map(tile,i) + m_num_entry(tile,i); }
179 
181  KOKKOS_INLINE_FUNCTION
182  const size_type* row_map_ptr() const
183  { return m_row_map.data(); }
184 
186  KOKKOS_INLINE_FUNCTION
187  const size_type& num_entry( size_type tile, size_type i ) const
188  { return m_num_entry(tile,i); }
189 
191  KOKKOS_INLINE_FUNCTION
192  const size_type& coord( const size_type entry, const size_type c ) const
193  { return m_coord2( entry, c ); }
194 
196  KOKKOS_INLINE_FUNCTION
197  const size_type& coord( const size_type entry ) const
198  { return m_coord( entry ); }
199 
201  KOKKOS_INLINE_FUNCTION
202  const value_type & value( const size_type entry ) const
203  { return m_value( entry ); }
204 
206  KOKKOS_INLINE_FUNCTION
208  { return m_nnz; }
209 
211  KOKKOS_INLINE_FUNCTION
213  { return m_flops; }
214 
216  KOKKOS_INLINE_FUNCTION
218  { return m_tile_size; }
219 
221  KOKKOS_INLINE_FUNCTION
223  { return m_coord_offset.extent(0); }
224 
226  KOKKOS_INLINE_FUNCTION
227  const size_type& offset( const size_type entry, const size_type c ) const
228  { return m_coord_offset( entry, c ); }
229 
231  KOKKOS_INLINE_FUNCTION
232  const size_type& range( const size_type entry, const size_type c ) const
233  { return m_coord_range( entry, c ); }
234 
235  template <typename OrdinalType>
236  static TiledCrsProductTensor
239  const Teuchos::ParameterList& params)
240  {
241  typedef Stokhos::CijkData<OrdinalType,ValueType> Cijk_Data_type;
242 
243  const size_type tile_size = params.get<int>("Tile Size");
244  const size_type max_tiles = params.get<int>("Max Tiles");
245 
246  // Build tensor data list
247  Teuchos::ArrayRCP<Cijk_Data_type> coordinate_list =
249 
250  // Partition via RCB
251  typedef Stokhos::RCB<Cijk_Data_type> rcb_type;
252  typedef typename rcb_type::Box box_type;
253  rcb_type rcb(tile_size, max_tiles, coordinate_list());
255  rcb.get_parts();
256  size_type num_parts = rcb.get_num_parts();
257 
258  // Compute number of non-zeros for each row in each part
259  size_type total_num_rows = 0, max_num_rows = 0, entry_count = 0;
260  Teuchos::Array< Teuchos::Array<size_type> > coord_work( num_parts );
261  for (size_type part=0; part<num_parts; ++part) {
262  Teuchos::RCP<box_type> box = (*parts)[part];
263  size_type num_rows = box->delta_x;
264  total_num_rows += num_rows;
265  max_num_rows = std::max(max_num_rows, num_rows);
266  coord_work[part].resize(num_rows, 0);
267 
268  size_type nc = box->coords.size();
269  for (size_type c=0; c<nc; ++c) {
270  size_type i = box->coords[c](0) - box->xmin;
271  ++(coord_work[part][i]);
272  ++entry_count;
273  }
274  }
275 
276  // Pad each row to have size divisible by alignment size
277  for (size_type part=0; part<num_parts; ++part) {
278  size_type sz = coord_work[part].size();
279  for ( size_type i = 0; i < sz; ++i ) {
280  const size_t rem = coord_work[part][i] % tensor_align;
281  if (rem > 0) {
282  const size_t pad = tensor_align - rem;
283  coord_work[part][i] += pad;
284  entry_count += pad;
285  }
286  }
287  }
288 
289  // Allocate tensor data
290  TiledCrsProductTensor tensor;
291  tensor.m_coord =
292  coord_array_type( "tensor_coord", entry_count );
293  tensor.m_coord2 =
294  coord2_array_type( "tensor_coord2", entry_count );
295  tensor.m_coord_offset =
296  coord_offset_type( "tensor_coord_offset", num_parts );
297  tensor.m_coord_range =
298  coord_range_type( "tensor_coord_range", num_parts );
299  tensor.m_value =
300  value_array_type( "tensor_value", entry_count );
301  tensor.m_num_entry =
302  entry_array_type( "tensor_num_entry", num_parts, max_num_rows );
303  tensor.m_row_map =
304  row_map_array_type( "tensor_row_map", num_parts, max_num_rows+1 );
305  tensor.m_num_rows =
306  num_row_array_type( "tensor_num_rows", num_parts );
307  tensor.m_dimension = basis.size();
308  tensor.m_tile_size = tile_size;
309  tensor.m_max_num_rows = max_num_rows;
310 
311  // Create mirror, is a view if is host memory
312  typename coord_array_type::HostMirror host_coord =
314  typename coord2_array_type::HostMirror host_coord2 =
316  typename coord_offset_type::HostMirror host_coord_offset =
318  typename coord_range_type::HostMirror host_coord_range =
320  typename value_array_type::HostMirror host_value =
322  typename entry_array_type::HostMirror host_num_entry =
324  typename row_map_array_type::HostMirror host_row_map =
326  typename num_row_array_type::HostMirror host_num_rows =
328 
329  // Compute row map
330  size_type sum = 0;
331  for (size_type part=0; part<num_parts; ++part) {
332  size_type nc = coord_work[part].size();
333  host_row_map(part,0) = sum;
334  for (size_type t=0; t<nc; ++t) {
335  sum += coord_work[part][t];
336  host_row_map(part,t+1) = sum;
337  }
338  }
339 
340  // Copy per part row offsets back into coord_work
341  for (size_type part=0; part<num_parts; ++part) {
342  size_type nc = coord_work[part].size();
343  for (size_type t=0; t<nc; ++t) {
344  coord_work[part][t] = host_row_map(part,t);
345  }
346  }
347 
348  // Fill in coordinate and value arrays
349  for (size_type part=0; part<num_parts; ++part) {
350  Teuchos::RCP<box_type> box = (*parts)[part];
351 
352  host_coord_offset(part,0) = box->xmin;
353  host_coord_offset(part,1) = box->ymin;
354  host_coord_offset(part,2) = box->zmin;
355 
356  host_coord_range(part,0) = box->delta_x;
357  host_coord_range(part,1) = box->delta_y;
358  host_coord_range(part,2) = box->delta_z;
359 
360  host_num_rows(part) = coord_work[part].size(); // also == box->delta_x
361 
362  size_type nc = box->coords.size();
363  for (size_type t=0; t<nc; ++t) {
364  const size_type i = box->coords[t].i;
365  const size_type j = box->coords[t].j;
366  const size_type k = box->coords[t].k;
367  const value_type c = box->coords[t].c;
368 
369  const size_type row = i - box->xmin;
370  const size_type n = coord_work[part][row];
371  ++coord_work[part][row];
372 
373  host_value(n) = c;
374  host_coord2(n,0) = j - box->ymin;
375  host_coord2(n,1) = k - box->zmin;
376  host_coord(n) = ( host_coord2(n,1) << 16 ) | host_coord2(n,0);
377 
378  ++host_num_entry(part,row);
379  ++tensor.m_nnz;
380  }
381  }
382 
383  // Copy data to device if necessary
384  Kokkos::deep_copy( tensor.m_coord, host_coord );
385  Kokkos::deep_copy( tensor.m_coord2, host_coord2 );
386  Kokkos::deep_copy( tensor.m_coord_offset, host_coord_offset );
387  Kokkos::deep_copy( tensor.m_coord_range, host_coord_range );
388  Kokkos::deep_copy( tensor.m_value, host_value );
389  Kokkos::deep_copy( tensor.m_num_entry, host_num_entry );
390  Kokkos::deep_copy( tensor.m_row_map, host_row_map );
391  Kokkos::deep_copy( tensor.m_num_rows, host_num_rows );
392 
393  tensor.m_entry_max = 0;
394  tensor.m_flops = 0;
395  for (size_type part=0; part<num_parts; ++part) {
396  for ( size_type i = 0; i < host_num_rows(part); ++i ) {
397  tensor.m_entry_max = std::max( tensor.m_entry_max,
398  host_num_entry(part,i) );
399  tensor.m_flops += 5*host_num_entry(part,i) + 1;
400  }
401  }
402 
403  return tensor;
404  }
405 };
406 
407 template< class Device, typename OrdinalType, typename ValueType >
408 TiledCrsProductTensor<ValueType, Device>
412  const Teuchos::ParameterList& params)
413 {
415  basis, Cijk, params );
416 }
417 
418 template < typename ValueType, typename Device >
419 class BlockMultiply< TiledCrsProductTensor< ValueType , Device > >
420 {
421 public:
422 
423  typedef typename Device::size_type size_type ;
425 
426  template< typename MatrixValue , typename VectorValue >
427  KOKKOS_INLINE_FUNCTION
428  static void apply( const tensor_type & tensor ,
429  const MatrixValue * const a ,
430  const VectorValue * const x ,
431  VectorValue * const y )
432  {
433  const size_type block_size = 2;
435 
436  const size_type n_tile = tensor.num_tiles();
437 
438  for ( size_type tile = 0 ; tile < n_tile ; ++tile ) {
439 
440  const size_type i_offset = tensor.offset(tile, 0);
441  const size_type j_offset = tensor.offset(tile, 1);
442  const size_type k_offset = tensor.offset(tile, 2);
443 
444  const size_type n_row = tensor.num_rows(tile);
445 
446  for ( size_type i = 0 ; i < n_row ; ++i ) {
447 
448  const size_type nEntry = tensor.num_entry(tile,i);
449  const size_type iEntryBeg = tensor.entry_begin(tile,i);
450  const size_type iEntryEnd = iEntryBeg + nEntry;
451  size_type iEntry = iEntryBeg;
452 
453  VectorValue ytmp = 0 ;
454 
455  // Do entries with a blocked loop of size block_size
456  if (block_size > 1) {
457  const size_type nBlock = nEntry / block_size;
458  const size_type nEntryB = nBlock * block_size;
459  const size_type iEnd = iEntryBeg + nEntryB;
460 
461  TV vy;
462  vy.zero();
463  int j[block_size], k[block_size];
464 
465  for ( ; iEntry < iEnd ; iEntry += block_size ) {
466 
467  for (size_type ii=0; ii<block_size; ++ii) {
468  j[ii] = tensor.coord(iEntry+ii,0) + j_offset;
469  k[ii] = tensor.coord(iEntry+ii,1) + k_offset;
470  }
471  TV aj(a, j), ak(a, k), xj(x, j), xk(x, k),
472  c(&(tensor.value(iEntry)));
473 
474  // vy += c * ( aj * xk + ak * xj)
475  aj.times_equal(xk);
476  ak.times_equal(xj);
477  aj.plus_equal(ak);
478  c.times_equal(aj);
479  vy.plus_equal(c);
480 
481  }
482 
483  ytmp += vy.sum();
484  }
485 
486  // Do remaining entries with a scalar loop
487  for ( ; iEntry<iEntryEnd; ++iEntry) {
488  const size_type j = tensor.coord(iEntry,0) + j_offset;
489  const size_type k = tensor.coord(iEntry,1) + k_offset;
490 
491  ytmp += tensor.value(iEntry) * ( a[j] * x[k] + a[k] * x[j] );
492  }
493 
494  y[i+i_offset] += ytmp ;
495  //y[i] += ytmp ;
496  }
497  }
498  }
499 
500  KOKKOS_INLINE_FUNCTION
501  static size_type matrix_size( const tensor_type & tensor )
502  { return tensor.dimension(); }
503 
504  KOKKOS_INLINE_FUNCTION
505  static size_type vector_size( const tensor_type & tensor )
506  { return tensor.dimension(); }
507 };
508 
509 } /* namespace Stokhos */
510 
511 //----------------------------------------------------------------------------
512 //----------------------------------------------------------------------------
513 
514 #endif /* #ifndef STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP */
KOKKOS_INLINE_FUNCTION size_type num_tiles() const
Number tiles.
KOKKOS_INLINE_FUNCTION const size_type & range(const size_type entry, const size_type c) const
Coordinate range.
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
TiledCrsProductTensor & operator=(const TiledCrsProductTensor &rhs)
T & get(ParameterList &l, const std::string &name)
KOKKOS_INLINE_FUNCTION size_type dimension() const
Dimension of the tensor.
KOKKOS_INLINE_FUNCTION const size_type & num_entry(size_type tile, size_type i) const
Number of entries with a coordinate &#39;i&#39;.
KOKKOS_INLINE_FUNCTION size_type tile_size() const
Number tiles.
KOKKOS_INLINE_FUNCTION size_type entry_count() const
Number of sparse entries.
Kokkos::View< size_type[][3], execution_space > coord_range_type
static KOKKOS_INLINE_FUNCTION size_type vector_size(const tensor_type &tensor)
KOKKOS_INLINE_FUNCTION size_type entry_maximum() const
Maximum sparse entries for any coordinate.
TiledCrsProductTensor< ValueType, Device > create_tiled_product_tensor(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList &params)
Kokkos::View< value_type[], execution_space > vec_type
Kokkos::View< size_type[][2], Kokkos::LayoutLeft, execution_space > coord2_array_type
static TiledCrsProductTensor create(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList &params)
KOKKOS_INLINE_FUNCTION PCE< Storage > max(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
Kokkos::View< size_type[][3], execution_space > coord_offset_type
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry) const
Coordinates of an entry.
Kokkos::View< size_type[], execution_space > coord_array_type
KOKKOS_INLINE_FUNCTION const size_type * row_map_ptr() const
Return row_map ptr.
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
TiledCrsProductTensor(const TiledCrsProductTensor &rhs)
void resize(size_type new_size, const value_type &x=value_type())
Kokkos::View< value_type[], execution_space > value_array_type
KOKKOS_INLINE_FUNCTION const size_type & offset(const size_type entry, const size_type c) const
Coordinate offset.
Kokkos::View< size_type **, layout_type, execution_space > row_map_array_type
KOKKOS_INLINE_FUNCTION size_type num_non_zeros() const
Number of non-zero&#39;s.
static KOKKOS_INLINE_FUNCTION size_type matrix_size(const tensor_type &tensor)
KOKKOS_INLINE_FUNCTION size_type entry_end(size_type tile, size_type i) const
End entries with a coordinate &#39;i&#39;.
KOKKOS_INLINE_FUNCTION size_type num_flops() const
Number flop&#39;s per multiply-add.
KOKKOS_INLINE_FUNCTION const value_type & value(const size_type entry) const
Value of an entry.
size_type size() const
KOKKOS_INLINE_FUNCTION size_type num_rows(size_type tile) const
Number of rows in given tile.
KOKKOS_INLINE_FUNCTION const size_type & entry_begin(size_type tile, size_type i) const
Begin entries with a coordinate &#39;i&#39;.
Kokkos::View< size_type[], execution_space > num_row_array_type
Kokkos::View< size_type **, layout_type, execution_space > entry_array_type
Teuchos::ArrayRCP< CijkData< ordinal_type, scalar_type > > build_cijk_coordinate_list(const Sparse3Tensor< ordinal_type, scalar_type > &Cijk, CijkSymmetryType symmetry_type)
KOKKOS_INLINE_FUNCTION size_type max_num_rows() const
Maximum number of rows in any tile.
static KOKKOS_INLINE_FUNCTION void apply(const tensor_type &tensor, const MatrixValue *const a, const VectorValue *const x, VectorValue *const y)
int n
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
virtual ordinal_type size() const =0
Return total size of basis.
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry, const size_type c) const
Coordinates of an entry.
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)