Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_LexicographicBlockSparse3Tensor.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef STOKHOS_LEXICOGRAPHIC_BLOCK_SPARSE_3_TENSOR_HPP
11 #define STOKHOS_LEXICOGRAPHIC_BLOCK_SPARSE_3_TENSOR_HPP
12 
13 #include "Kokkos_Core.hpp"
14 
15 #include "Stokhos_Multiply.hpp"
16 #include "Stokhos_ProductBasis.hpp"
19 
20 #include <sstream>
21 #include <fstream>
22 
23 //----------------------------------------------------------------------------
24 //----------------------------------------------------------------------------
25 
26 namespace Stokhos {
27 
31 template< typename ValueType , class ExecutionSpace >
33 public:
34 
35  typedef ExecutionSpace execution_space;
36  typedef typename execution_space::size_type size_type;
37  typedef ValueType value_type;
38 
39 private:
40 
41  typedef Kokkos::View< int[][7] , Kokkos::LayoutRight, execution_space > coord_array_type;
42  typedef Kokkos::View< value_type[], execution_space > value_array_type;
43 
49 
50 public:
51 
52  inline
54 
55  inline
57  m_coord(),
58  m_value(),
59  m_dimension(),
60  m_flops(0),
61  m_symmetric(false) {}
62 
63  inline
65  const LexicographicBlockSparse3Tensor & rhs) :
66  m_coord(rhs.m_coord),
67  m_value(rhs.m_value),
69  m_flops(rhs.m_flops),
70  m_symmetric(rhs.m_symmetric) {}
71 
72  inline
75  {
76  m_coord = rhs.m_coord;
77  m_value = rhs.m_value;
79  m_flops = rhs.m_flops;
81  return *this ;
82  }
83 
85  KOKKOS_INLINE_FUNCTION
86  size_type dimension() const { return m_dimension; }
87 
89  KOKKOS_INLINE_FUNCTION
90  size_type num_coord() const { return m_coord.extent(0); }
91 
93  KOKKOS_INLINE_FUNCTION
94  size_type num_value() const { return m_value.extent(0); }
95 
97  KOKKOS_INLINE_FUNCTION
98  int get_i_begin(const size_type entry) const {
99  return m_coord(entry,0);
100  }
101 
103  KOKKOS_INLINE_FUNCTION
104  int get_j_begin(const size_type entry) const {
105  return m_coord(entry,1);
106  }
107 
109  KOKKOS_INLINE_FUNCTION
110  int get_k_begin(const size_type entry) const {
111  return m_coord(entry,2);
112  }
113 
115  KOKKOS_INLINE_FUNCTION
116  int get_p_i(const size_type entry) const {
117  return m_coord(entry,3);
118  }
119 
121  KOKKOS_INLINE_FUNCTION
122  int get_p_j(const size_type entry) const {
123  return m_coord(entry,4);
124  }
125 
127  KOKKOS_INLINE_FUNCTION
128  int get_p_k(const size_type entry) const {
129  return m_coord(entry,5);
130  }
131 
133  KOKKOS_INLINE_FUNCTION
134  int get_j_eq_k(const size_type entry) const {
135  return m_coord(entry,6);
136  }
137 
139  KOKKOS_INLINE_FUNCTION
140  const value_type& value(const size_type entry) const
141  { return m_value(entry); }
142 
144  KOKKOS_INLINE_FUNCTION
145  size_type num_non_zeros() const { return m_value.extent(0); }
146 
148  KOKKOS_INLINE_FUNCTION
149  size_type num_flops() const { return m_flops; }
150 
152  KOKKOS_INLINE_FUNCTION
153  bool symmetric() const { return m_symmetric; }
154 
155  template <typename OrdinalType>
160  {
161  using Teuchos::Array;
162  using Teuchos::RCP;
163 
164  // Allocate tensor data
166  tensor.m_dimension = basis.size();
167  tensor.m_symmetric = Cijk.symmetric();
168  tensor.m_coord = coord_array_type( "coord" , Cijk.num_leafs() );
169  tensor.m_value = value_array_type( "value" , Cijk.num_entries() );
170 
171  // Create mirror, is a view if is host memory
172  typename coord_array_type::HostMirror host_coord =
174  typename value_array_type::HostMirror host_value =
176 
177  // Fill flat 3 tensor
179  typedef typename Cijk_type::CijkNode node_type;
180  Array< RCP<const node_type> > node_stack;
181  Array< OrdinalType > index_stack;
182  node_stack.push_back(Cijk.getHeadNode());
183  index_stack.push_back(0);
185  OrdinalType child_index;
186  OrdinalType coord_index = 0;
187  OrdinalType value_index = 0;
188  tensor.m_flops = 0;
189  while (node_stack.size() > 0) {
190  node = node_stack.back();
191  child_index = index_stack.back();
192 
193  // Leaf
194  if (node->is_leaf) {
195  host_coord(coord_index, 0) = node->i_begin;
196  host_coord(coord_index, 1) = node->j_begin;
197  host_coord(coord_index, 2) = node->k_begin;
198  host_coord(coord_index, 3) = node->p_i;
199  host_coord(coord_index, 4) = node->p_j;
200  host_coord(coord_index, 5) = node->p_k;
201  host_coord(coord_index, 6) = node->parent_j_equals_k;
202  ++coord_index;
203  for (OrdinalType i=0; i<node->my_num_entries; ++i)
204  host_value(value_index++) = node->values[i];
205  tensor.m_flops += 5*node->my_num_entries + node->i_size;
206  node_stack.pop_back();
207  index_stack.pop_back();
208  }
209 
210  // More children to process -- process them first
211  else if (child_index < node->children.size()) {
212  ++index_stack.back();
213  node = node->children[child_index];
214  node_stack.push_back(node);
215  index_stack.push_back(0);
216  }
217 
218  // No more children
219  else {
220  node_stack.pop_back();
221  index_stack.pop_back();
222  }
223 
224  }
225  TEUCHOS_ASSERT(coord_index == Cijk.num_leafs());
226  TEUCHOS_ASSERT(value_index == Cijk.num_entries());
227 
228  /*
229  // Save block volumes to file
230  static int index = 0;
231  std::stringstream file_name;
232  file_name << "cijk_vol_" << index << ".txt";
233  std::ofstream file(file_name.str().c_str());
234  for (size_type i=0; i<coord_index; ++i) {
235  int vol = host_coord(i,3) * host_coord(i,4) * host_coord(i,5);
236  file << vol << std::endl;
237  }
238  file.close();
239  index++;
240  */
241 
242  // Copy data to device if necessary
243  Kokkos::deep_copy( tensor.m_coord , host_coord );
244  Kokkos::deep_copy( tensor.m_value , host_value );
245 
246  return tensor ;
247  }
248 };
249 
250 template< class Device , typename OrdinalType , typename ValueType >
251 LexicographicBlockSparse3Tensor<ValueType, Device>
256 {
258  basis, Cijk, params);
259 }
260 
261  template < typename ValueType, typename Device >
262 class BlockMultiply< LexicographicBlockSparse3Tensor< ValueType , Device > >
263 {
264 public:
265 
266  typedef typename Device::size_type size_type ;
268 
269  template< typename MatrixValue , typename VectorValue >
270  KOKKOS_INLINE_FUNCTION
271  static void apply( const tensor_type & tensor ,
272  const MatrixValue * const a ,
273  const VectorValue * const x ,
274  VectorValue * const y )
275  {
276  const size_type nBlock = tensor.num_coord();
277 
278  if (tensor.symmetric()) {
279 
280  // Loop over coordinate blocks
281  size_type value_entry = 0;
282  for ( size_type block = 0; block < nBlock; ++block) {
283  const int i_begin = tensor.get_i_begin(block);
284  const int j_begin = tensor.get_j_begin(block);
285  const int k_begin = tensor.get_k_begin(block);
286  const int p_i = tensor.get_p_i(block);
287  const int p_j = tensor.get_p_j(block);
288  const int p_k = tensor.get_p_k(block);
289  VectorValue * const y_block = y + i_begin;
290  const MatrixValue * const a_j_block = a + j_begin;
291  const VectorValue * const x_k_block = x + k_begin;
292  const MatrixValue * const a_k_block = a + k_begin;
293  const VectorValue * const x_j_block = x + j_begin;
294 
295  // for (int i=0; i<=p_i; ++i) {
296  // VectorValue ytmp = 0;
297  // for (int j=0; j<=p_j; ++j) {
298  // int k0 = j_eq_k != 0 ? j : 0;
299  // if (symmetric && (k0 % 2 != (i+j) % 2)) ++k0;
300  // for (int k=k0; k<=p_k; k+=k_inc) {
301  // ytmp += tensor.value(value_entry++) *
302  // ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
303  // }
304  // }
305  // y_block[i] += ytmp ;
306  // }
307 
308  if (tensor.get_j_eq_k(block) != 0) {
309  for (int i=0; i<=p_i; ++i) {
310  VectorValue ytmp = 0;
311  for (int j=0; j<=p_j; ++j) {
312  int k0 = j%2 != (i+j)%2 ? j+1 : j;
313  for (int k=k0; k<=p_k; k+=2) {
314  ytmp += tensor.value(value_entry++) *
315  ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
316  }
317  }
318  y_block[i] += ytmp ;
319  }
320  }
321  else {
322  for (int i=0; i<=p_i; ++i) {
323  VectorValue ytmp = 0;
324  for (int j=0; j<=p_j; ++j) {
325  for (int k=(i+j)%2; k<=p_k; k+=2) {
326  ytmp += tensor.value(value_entry++) *
327  ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
328  }
329  }
330  y_block[i] += ytmp ;
331  }
332  }
333  }
334 
335  }
336 
337  else {
338 
339  // Loop over coordinate blocks
340  size_type value_entry = 0;
341  for ( size_type block = 0; block < nBlock; ++block) {
342  const int i_begin = tensor.get_i_begin(block);
343  const int j_begin = tensor.get_j_begin(block);
344  const int k_begin = tensor.get_k_begin(block);
345  const int p_i = tensor.get_p_i(block);
346  const int p_j = tensor.get_p_j(block);
347  const int p_k = tensor.get_p_k(block);
348  VectorValue * const y_block = y + i_begin;
349  const MatrixValue * const a_j_block = a + j_begin;
350  const VectorValue * const x_k_block = x + k_begin;
351  const MatrixValue * const a_k_block = a + k_begin;
352  const VectorValue * const x_j_block = x + j_begin;
353 
354  // for (int i=0; i<=p_i; ++i) {
355  // VectorValue ytmp = 0;
356  // for (int j=0; j<=p_j; ++j) {
357  // int k0 = j_eq_k != 0 ? j : 0;
358  // if (symmetric && (k0 % 2 != (i+j) % 2)) ++k0;
359  // for (int k=k0; k<=p_k; k+=k_inc) {
360  // ytmp += tensor.value(value_entry++) *
361  // ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
362  // }
363  // }
364  // y_block[i] += ytmp ;
365  // }
366 
367  if (tensor.get_j_eq_k(block) != 0) {
368  for (int i=0; i<=p_i; ++i) {
369  VectorValue ytmp = 0;
370  for (int j=0; j<=p_j; ++j) {
371  for (int k=j; k<=p_k; ++k) {
372  ytmp += tensor.value(value_entry++) *
373  ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
374  }
375  }
376  y_block[i] += ytmp ;
377  }
378  }
379  else {
380  for (int i=0; i<=p_i; ++i) {
381  VectorValue ytmp = 0;
382  for (int j=0; j<=p_j; ++j) {
383  for (int k=0; k<=p_k; ++k) {
384  ytmp += tensor.value(value_entry++) *
385  ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
386  }
387  }
388  y_block[i] += ytmp ;
389  }
390  }
391  }
392 
393  }
394  }
395 
396  KOKKOS_INLINE_FUNCTION
397  static size_type matrix_size( const tensor_type & tensor )
398  { return tensor.dimension(); }
399 
400  KOKKOS_INLINE_FUNCTION
401  static size_type vector_size( const tensor_type & tensor )
402  { return tensor.dimension(); }
403 };
404 
405 } /* namespace Stokhos */
406 
407 //----------------------------------------------------------------------------
408 //----------------------------------------------------------------------------
409 
410 #endif /* #ifndef STOKHOS_LEXICOGRAPHIC_BLOCK_SPARSE_3_TENSOR_HPP */
Teuchos::RCP< const CijkNode > getHeadNode() const
Get the head node.
KOKKOS_INLINE_FUNCTION int get_j_begin(const size_type entry) const
LexicographicBlockSparse3Tensor< ValueType, Device > create_lexicographic_block_sparse_3_tensor(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::LTBSparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList &params=Teuchos::ParameterList())
LexicographicBlockSparse3Tensor(const LexicographicBlockSparse3Tensor &rhs)
KOKKOS_INLINE_FUNCTION int get_p_k(const size_type entry) const
static KOKKOS_INLINE_FUNCTION void apply(const tensor_type &tensor, const MatrixValue *const a, const VectorValue *const x, VectorValue *const y)
KOKKOS_INLINE_FUNCTION int get_j_eq_k(const size_type entry) const
Kokkos::View< value_type[], execution_space > value_array_type
ordinal_type num_entries() const
Return number of non-zero entries.
ordinal_type num_leafs() const
Return number of nodes.
KOKKOS_INLINE_FUNCTION size_type num_value() const
Number of values.
LexicographicBlockSparse3Tensor & operator=(const LexicographicBlockSparse3Tensor &rhs)
Kokkos::Serial node_type
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
KOKKOS_INLINE_FUNCTION int get_k_begin(const size_type entry) const
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
KOKKOS_INLINE_FUNCTION size_type num_coord() const
Number of coordinates.
KOKKOS_INLINE_FUNCTION size_type num_flops() const
Number flop&#39;s per multiply-add.
Kokkos::View< int[][7], Kokkos::LayoutRight, execution_space > coord_array_type
Stokhos::Sparse3Tensor< int, double > Cijk_type
KOKKOS_INLINE_FUNCTION int get_p_i(const size_type entry) const
reference back()
void push_back(const value_type &x)
bool symmetric() const
Return if symmetric.
KOKKOS_INLINE_FUNCTION bool symmetric() const
Is PDF symmetric.
size_type size() const
KOKKOS_INLINE_FUNCTION const value_type & value(const size_type entry) const
Cijk for entry &#39;entry&#39;.
KOKKOS_INLINE_FUNCTION size_type dimension() const
Dimension of the tensor.
static LexicographicBlockSparse3Tensor create(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::LTBSparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList &params=Teuchos::ParameterList())
KOKKOS_INLINE_FUNCTION int get_p_j(const size_type entry) const
KOKKOS_INLINE_FUNCTION int get_i_begin(const size_type entry) const
#define TEUCHOS_ASSERT(assertion_test)
virtual ordinal_type size() const =0
Return total size of basis.
KOKKOS_INLINE_FUNCTION size_type num_non_zeros() const
Number of non-zero&#39;s.
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)