Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_TiledCrsProductTensor.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP
43 #define STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP
44 
45 #include "Kokkos_Core.hpp"
46 
47 #include "Stokhos_Multiply.hpp"
48 #include "Stokhos_ProductBasis.hpp"
52 #include "Stokhos_TinyVec.hpp"
53 
54 
55 //----------------------------------------------------------------------------
56 //----------------------------------------------------------------------------
57 
58 namespace Stokhos {
59 
60 template< typename ValueType, class ExecutionSpace >
62 public:
63 
64  typedef ExecutionSpace execution_space;
65  typedef int size_type;
66  typedef ValueType value_type;
67 
68 // Vectorsize used in multiply algorithm
69 #if defined(__AVX__)
70  static const size_type host_vectorsize = 32/sizeof(value_type);
71  static const bool use_intrinsics = true;
72 #elif defined(__MIC__)
73  static const size_type host_vectorsize = 16;
74  static const bool use_intrinsics = true;
75 #else
76  static const size_type host_vectorsize = 2;
77  static const bool use_intrinsics = false;
78 #endif
79  static const size_type cuda_vectorsize = 32;
80  static const bool is_cuda =
81 #if defined( KOKKOS_ENABLE_CUDA )
82  Kokkos::Impl::is_same<ExecutionSpace,Kokkos::Cuda>::value;
83 #else
84  false ;
85 #endif
87 
88  // Alignment in terms of number of entries of CRS rows
90 
91 private:
92 
93  typedef Kokkos::LayoutRight layout_type;
94  typedef Kokkos::View< value_type[], execution_space > vec_type;
95  typedef Kokkos::View< size_type[], execution_space > coord_array_type;
96  typedef Kokkos::View< size_type[][2], Kokkos::LayoutLeft, execution_space > coord2_array_type;
97  typedef Kokkos::View< size_type[][3], execution_space > coord_offset_type;
98  typedef Kokkos::View< size_type[][3], execution_space > coord_range_type;
99  typedef Kokkos::View< value_type[], execution_space > value_array_type;
100  typedef Kokkos::View< size_type**, layout_type, execution_space > entry_array_type;
101  typedef Kokkos::View< size_type**, layout_type, execution_space > row_map_array_type;
102  typedef Kokkos::View< size_type[], execution_space > num_row_array_type;
103 
118 
119 public:
120 
121  inline
123 
124  inline
126  m_coord(),
127  m_coord2(),
128  m_coord_offset(),
129  m_coord_range(),
130  m_value(),
131  m_num_entry(),
132  m_row_map(),
133  m_num_rows(),
134  m_dimension(0),
135  m_tile_size(0),
136  m_entry_max(0),
137  m_max_num_rows(0),
138  m_nnz(0),
139  m_flops(0) {}
140 
141  inline
143  m_coord( rhs.m_coord ),
144  m_coord2( rhs.m_coord2 ),
147  m_value( rhs.m_value ),
148  m_num_entry( rhs.m_num_entry ),
149  m_row_map( rhs.m_row_map ),
150  m_num_rows( rhs.m_num_rows ),
151  m_dimension( rhs.m_dimension ),
152  m_tile_size( rhs.m_tile_size ),
153  m_entry_max( rhs.m_entry_max ),
155  m_nnz( rhs.m_nnz ),
156  m_flops( rhs.m_flops ) {}
157 
158  inline
160  {
161  m_coord = rhs.m_coord;
162  m_coord2 = rhs.m_coord2;
165  m_value = rhs.m_value;
166  m_num_entry = rhs.m_num_entry;
167  m_row_map = rhs.m_row_map;
168  m_num_rows = rhs.m_num_rows;
169  m_dimension = rhs.m_dimension;
170  m_tile_size = rhs.m_tile_size;
171  m_entry_max = rhs.m_entry_max;
173  m_nnz = rhs.m_nnz;
174  m_flops = rhs.m_flops;
175  return *this;
176  }
177 
179  KOKKOS_INLINE_FUNCTION
180  size_type dimension() const { return m_dimension; }
181 
183  KOKKOS_INLINE_FUNCTION
185  { return m_coord.extent(0); }
186 
188  KOKKOS_INLINE_FUNCTION
190  { return m_entry_max; }
191 
193  KOKKOS_INLINE_FUNCTION
195  { return m_max_num_rows; }
196 
198  KOKKOS_INLINE_FUNCTION
200  { return m_num_rows(tile); }
201 
203  KOKKOS_INLINE_FUNCTION
204  const size_type& entry_begin( size_type tile, size_type i ) const
205  { return m_row_map(tile,i); }
206 
208  KOKKOS_INLINE_FUNCTION
210  { return m_row_map(tile,i) + m_num_entry(tile,i); }
211 
213  KOKKOS_INLINE_FUNCTION
214  const size_type* row_map_ptr() const
215  { return m_row_map.data(); }
216 
218  KOKKOS_INLINE_FUNCTION
219  const size_type& num_entry( size_type tile, size_type i ) const
220  { return m_num_entry(tile,i); }
221 
223  KOKKOS_INLINE_FUNCTION
224  const size_type& coord( const size_type entry, const size_type c ) const
225  { return m_coord2( entry, c ); }
226 
228  KOKKOS_INLINE_FUNCTION
229  const size_type& coord( const size_type entry ) const
230  { return m_coord( entry ); }
231 
233  KOKKOS_INLINE_FUNCTION
234  const value_type & value( const size_type entry ) const
235  { return m_value( entry ); }
236 
238  KOKKOS_INLINE_FUNCTION
240  { return m_nnz; }
241 
243  KOKKOS_INLINE_FUNCTION
245  { return m_flops; }
246 
248  KOKKOS_INLINE_FUNCTION
250  { return m_tile_size; }
251 
253  KOKKOS_INLINE_FUNCTION
255  { return m_coord_offset.extent(0); }
256 
258  KOKKOS_INLINE_FUNCTION
259  const size_type& offset( const size_type entry, const size_type c ) const
260  { return m_coord_offset( entry, c ); }
261 
263  KOKKOS_INLINE_FUNCTION
264  const size_type& range( const size_type entry, const size_type c ) const
265  { return m_coord_range( entry, c ); }
266 
267  template <typename OrdinalType>
268  static TiledCrsProductTensor
271  const Teuchos::ParameterList& params)
272  {
273  typedef Stokhos::CijkData<OrdinalType,ValueType> Cijk_Data_type;
274 
275  const size_type tile_size = params.get<int>("Tile Size");
276  const size_type max_tiles = params.get<int>("Max Tiles");
277 
278  // Build tensor data list
279  Teuchos::ArrayRCP<Cijk_Data_type> coordinate_list =
281 
282  // Partition via RCB
283  typedef Stokhos::RCB<Cijk_Data_type> rcb_type;
284  typedef typename rcb_type::Box box_type;
285  rcb_type rcb(tile_size, max_tiles, coordinate_list());
287  rcb.get_parts();
288  size_type num_parts = rcb.get_num_parts();
289 
290  // Compute number of non-zeros for each row in each part
291  size_type total_num_rows = 0, max_num_rows = 0, entry_count = 0;
292  Teuchos::Array< Teuchos::Array<size_type> > coord_work( num_parts );
293  for (size_type part=0; part<num_parts; ++part) {
294  Teuchos::RCP<box_type> box = (*parts)[part];
295  size_type num_rows = box->delta_x;
296  total_num_rows += num_rows;
297  max_num_rows = std::max(max_num_rows, num_rows);
298  coord_work[part].resize(num_rows, 0);
299 
300  size_type nc = box->coords.size();
301  for (size_type c=0; c<nc; ++c) {
302  size_type i = box->coords[c](0) - box->xmin;
303  ++(coord_work[part][i]);
304  ++entry_count;
305  }
306  }
307 
308  // Pad each row to have size divisible by alignment size
309  for (size_type part=0; part<num_parts; ++part) {
310  size_type sz = coord_work[part].size();
311  for ( size_type i = 0; i < sz; ++i ) {
312  const size_t rem = coord_work[part][i] % tensor_align;
313  if (rem > 0) {
314  const size_t pad = tensor_align - rem;
315  coord_work[part][i] += pad;
316  entry_count += pad;
317  }
318  }
319  }
320 
321  // Allocate tensor data
322  TiledCrsProductTensor tensor;
323  tensor.m_coord =
324  coord_array_type( "tensor_coord", entry_count );
325  tensor.m_coord2 =
326  coord2_array_type( "tensor_coord2", entry_count );
327  tensor.m_coord_offset =
328  coord_offset_type( "tensor_coord_offset", num_parts );
329  tensor.m_coord_range =
330  coord_range_type( "tensor_coord_range", num_parts );
331  tensor.m_value =
332  value_array_type( "tensor_value", entry_count );
333  tensor.m_num_entry =
334  entry_array_type( "tensor_num_entry", num_parts, max_num_rows );
335  tensor.m_row_map =
336  row_map_array_type( "tensor_row_map", num_parts, max_num_rows+1 );
337  tensor.m_num_rows =
338  num_row_array_type( "tensor_num_rows", num_parts );
339  tensor.m_dimension = basis.size();
340  tensor.m_tile_size = tile_size;
341  tensor.m_max_num_rows = max_num_rows;
342 
343  // Create mirror, is a view if is host memory
344  typename coord_array_type::HostMirror host_coord =
346  typename coord2_array_type::HostMirror host_coord2 =
348  typename coord_offset_type::HostMirror host_coord_offset =
350  typename coord_range_type::HostMirror host_coord_range =
352  typename value_array_type::HostMirror host_value =
354  typename entry_array_type::HostMirror host_num_entry =
356  typename row_map_array_type::HostMirror host_row_map =
358  typename num_row_array_type::HostMirror host_num_rows =
360 
361  // Compute row map
362  size_type sum = 0;
363  for (size_type part=0; part<num_parts; ++part) {
364  size_type nc = coord_work[part].size();
365  host_row_map(part,0) = sum;
366  for (size_type t=0; t<nc; ++t) {
367  sum += coord_work[part][t];
368  host_row_map(part,t+1) = sum;
369  }
370  }
371 
372  // Copy per part row offsets back into coord_work
373  for (size_type part=0; part<num_parts; ++part) {
374  size_type nc = coord_work[part].size();
375  for (size_type t=0; t<nc; ++t) {
376  coord_work[part][t] = host_row_map(part,t);
377  }
378  }
379 
380  // Fill in coordinate and value arrays
381  for (size_type part=0; part<num_parts; ++part) {
382  Teuchos::RCP<box_type> box = (*parts)[part];
383 
384  host_coord_offset(part,0) = box->xmin;
385  host_coord_offset(part,1) = box->ymin;
386  host_coord_offset(part,2) = box->zmin;
387 
388  host_coord_range(part,0) = box->delta_x;
389  host_coord_range(part,1) = box->delta_y;
390  host_coord_range(part,2) = box->delta_z;
391 
392  host_num_rows(part) = coord_work[part].size(); // also == box->delta_x
393 
394  size_type nc = box->coords.size();
395  for (size_type t=0; t<nc; ++t) {
396  const size_type i = box->coords[t].i;
397  const size_type j = box->coords[t].j;
398  const size_type k = box->coords[t].k;
399  const value_type c = box->coords[t].c;
400 
401  const size_type row = i - box->xmin;
402  const size_type n = coord_work[part][row];
403  ++coord_work[part][row];
404 
405  host_value(n) = c;
406  host_coord2(n,0) = j - box->ymin;
407  host_coord2(n,1) = k - box->zmin;
408  host_coord(n) = ( host_coord2(n,1) << 16 ) | host_coord2(n,0);
409 
410  ++host_num_entry(part,row);
411  ++tensor.m_nnz;
412  }
413  }
414 
415  // Copy data to device if necessary
416  Kokkos::deep_copy( tensor.m_coord, host_coord );
417  Kokkos::deep_copy( tensor.m_coord2, host_coord2 );
418  Kokkos::deep_copy( tensor.m_coord_offset, host_coord_offset );
419  Kokkos::deep_copy( tensor.m_coord_range, host_coord_range );
420  Kokkos::deep_copy( tensor.m_value, host_value );
421  Kokkos::deep_copy( tensor.m_num_entry, host_num_entry );
422  Kokkos::deep_copy( tensor.m_row_map, host_row_map );
423  Kokkos::deep_copy( tensor.m_num_rows, host_num_rows );
424 
425  tensor.m_entry_max = 0;
426  tensor.m_flops = 0;
427  for (size_type part=0; part<num_parts; ++part) {
428  for ( size_type i = 0; i < host_num_rows(part); ++i ) {
429  tensor.m_entry_max = std::max( tensor.m_entry_max,
430  host_num_entry(part,i) );
431  tensor.m_flops += 5*host_num_entry(part,i) + 1;
432  }
433  }
434 
435  return tensor;
436  }
437 };
438 
439 template< class Device, typename OrdinalType, typename ValueType >
440 TiledCrsProductTensor<ValueType, Device>
444  const Teuchos::ParameterList& params)
445 {
447  basis, Cijk, params );
448 }
449 
450 template < typename ValueType, typename Device >
451 class BlockMultiply< TiledCrsProductTensor< ValueType , Device > >
452 {
453 public:
454 
455  typedef typename Device::size_type size_type ;
457 
458  template< typename MatrixValue , typename VectorValue >
459  KOKKOS_INLINE_FUNCTION
460  static void apply( const tensor_type & tensor ,
461  const MatrixValue * const a ,
462  const VectorValue * const x ,
463  VectorValue * const y )
464  {
465  const size_type block_size = 2;
467 
468  const size_type n_tile = tensor.num_tiles();
469 
470  for ( size_type tile = 0 ; tile < n_tile ; ++tile ) {
471 
472  const size_type i_offset = tensor.offset(tile, 0);
473  const size_type j_offset = tensor.offset(tile, 1);
474  const size_type k_offset = tensor.offset(tile, 2);
475 
476  const size_type n_row = tensor.num_rows(tile);
477 
478  for ( size_type i = 0 ; i < n_row ; ++i ) {
479 
480  const size_type nEntry = tensor.num_entry(tile,i);
481  const size_type iEntryBeg = tensor.entry_begin(tile,i);
482  const size_type iEntryEnd = iEntryBeg + nEntry;
483  size_type iEntry = iEntryBeg;
484 
485  VectorValue ytmp = 0 ;
486 
487  // Do entries with a blocked loop of size block_size
488  if (block_size > 1) {
489  const size_type nBlock = nEntry / block_size;
490  const size_type nEntryB = nBlock * block_size;
491  const size_type iEnd = iEntryBeg + nEntryB;
492 
493  TV vy;
494  vy.zero();
495  int j[block_size], k[block_size];
496 
497  for ( ; iEntry < iEnd ; iEntry += block_size ) {
498 
499  for (size_type ii=0; ii<block_size; ++ii) {
500  j[ii] = tensor.coord(iEntry+ii,0) + j_offset;
501  k[ii] = tensor.coord(iEntry+ii,1) + k_offset;
502  }
503  TV aj(a, j), ak(a, k), xj(x, j), xk(x, k),
504  c(&(tensor.value(iEntry)));
505 
506  // vy += c * ( aj * xk + ak * xj)
507  aj.times_equal(xk);
508  ak.times_equal(xj);
509  aj.plus_equal(ak);
510  c.times_equal(aj);
511  vy.plus_equal(c);
512 
513  }
514 
515  ytmp += vy.sum();
516  }
517 
518  // Do remaining entries with a scalar loop
519  for ( ; iEntry<iEntryEnd; ++iEntry) {
520  const size_type j = tensor.coord(iEntry,0) + j_offset;
521  const size_type k = tensor.coord(iEntry,1) + k_offset;
522 
523  ytmp += tensor.value(iEntry) * ( a[j] * x[k] + a[k] * x[j] );
524  }
525 
526  y[i+i_offset] += ytmp ;
527  //y[i] += ytmp ;
528  }
529  }
530  }
531 
532  KOKKOS_INLINE_FUNCTION
533  static size_type matrix_size( const tensor_type & tensor )
534  { return tensor.dimension(); }
535 
536  KOKKOS_INLINE_FUNCTION
537  static size_type vector_size( const tensor_type & tensor )
538  { return tensor.dimension(); }
539 };
540 
541 } /* namespace Stokhos */
542 
543 //----------------------------------------------------------------------------
544 //----------------------------------------------------------------------------
545 
546 #endif /* #ifndef STOKHOS_TILED_CRS_PRODUCT_TENSOR_HPP */
KOKKOS_INLINE_FUNCTION size_type num_tiles() const
Number tiles.
KOKKOS_INLINE_FUNCTION const size_type & range(const size_type entry, const size_type c) const
Coordinate range.
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
TiledCrsProductTensor & operator=(const TiledCrsProductTensor &rhs)
T & get(ParameterList &l, const std::string &name)
KOKKOS_INLINE_FUNCTION size_type dimension() const
Dimension of the tensor.
KOKKOS_INLINE_FUNCTION const size_type & num_entry(size_type tile, size_type i) const
Number of entries with a coordinate &#39;i&#39;.
KOKKOS_INLINE_FUNCTION size_type tile_size() const
Number tiles.
KOKKOS_INLINE_FUNCTION size_type entry_count() const
Number of sparse entries.
Kokkos::View< size_type[][3], execution_space > coord_range_type
static KOKKOS_INLINE_FUNCTION size_type vector_size(const tensor_type &tensor)
KOKKOS_INLINE_FUNCTION size_type entry_maximum() const
Maximum sparse entries for any coordinate.
TiledCrsProductTensor< ValueType, Device > create_tiled_product_tensor(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList &params)
Kokkos::View< value_type[], execution_space > vec_type
Kokkos::View< size_type[][2], Kokkos::LayoutLeft, execution_space > coord2_array_type
static TiledCrsProductTensor create(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::Sparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList &params)
KOKKOS_INLINE_FUNCTION PCE< Storage > max(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
Kokkos::View< size_type[][3], execution_space > coord_offset_type
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry) const
Coordinates of an entry.
Kokkos::View< size_type[], execution_space > coord_array_type
KOKKOS_INLINE_FUNCTION const size_type * row_map_ptr() const
Return row_map ptr.
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
TiledCrsProductTensor(const TiledCrsProductTensor &rhs)
void resize(size_type new_size, const value_type &x=value_type())
Kokkos::View< value_type[], execution_space > value_array_type
KOKKOS_INLINE_FUNCTION const size_type & offset(const size_type entry, const size_type c) const
Coordinate offset.
Kokkos::View< size_type **, layout_type, execution_space > row_map_array_type
KOKKOS_INLINE_FUNCTION size_type num_non_zeros() const
Number of non-zero&#39;s.
static KOKKOS_INLINE_FUNCTION size_type matrix_size(const tensor_type &tensor)
KOKKOS_INLINE_FUNCTION size_type entry_end(size_type tile, size_type i) const
End entries with a coordinate &#39;i&#39;.
KOKKOS_INLINE_FUNCTION size_type num_flops() const
Number flop&#39;s per multiply-add.
KOKKOS_INLINE_FUNCTION const value_type & value(const size_type entry) const
Value of an entry.
size_type size() const
KOKKOS_INLINE_FUNCTION size_type num_rows(size_type tile) const
Number of rows in given tile.
KOKKOS_INLINE_FUNCTION const size_type & entry_begin(size_type tile, size_type i) const
Begin entries with a coordinate &#39;i&#39;.
Kokkos::View< size_type[], execution_space > num_row_array_type
Kokkos::View< size_type **, layout_type, execution_space > entry_array_type
Teuchos::ArrayRCP< CijkData< ordinal_type, scalar_type > > build_cijk_coordinate_list(const Sparse3Tensor< ordinal_type, scalar_type > &Cijk, CijkSymmetryType symmetry_type)
KOKKOS_INLINE_FUNCTION size_type max_num_rows() const
Maximum number of rows in any tile.
static KOKKOS_INLINE_FUNCTION void apply(const tensor_type &tensor, const MatrixValue *const a, const VectorValue *const x, VectorValue *const y)
int n
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
virtual ordinal_type size() const =0
Return total size of basis.
KOKKOS_INLINE_FUNCTION const size_type & coord(const size_type entry, const size_type c) const
Coordinates of an entry.
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)