Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_LexicographicBlockSparse3Tensor.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_LEXICOGRAPHIC_BLOCK_SPARSE_3_TENSOR_HPP
43 #define STOKHOS_LEXICOGRAPHIC_BLOCK_SPARSE_3_TENSOR_HPP
44 
45 #include "Kokkos_Core.hpp"
46 
47 #include "Stokhos_Multiply.hpp"
48 #include "Stokhos_ProductBasis.hpp"
51 
52 #include <sstream>
53 #include <fstream>
54 
55 //----------------------------------------------------------------------------
56 //----------------------------------------------------------------------------
57 
58 namespace Stokhos {
59 
63 template< typename ValueType , class ExecutionSpace >
65 public:
66 
67  typedef ExecutionSpace execution_space;
68  typedef typename execution_space::size_type size_type;
69  typedef ValueType value_type;
70 
71 private:
72 
73  typedef Kokkos::View< int[][7] , Kokkos::LayoutRight, execution_space > coord_array_type;
74  typedef Kokkos::View< value_type[], execution_space > value_array_type;
75 
81 
82 public:
83 
84  inline
86 
87  inline
89  m_coord(),
90  m_value(),
91  m_dimension(),
92  m_flops(0),
93  m_symmetric(false) {}
94 
95  inline
97  const LexicographicBlockSparse3Tensor & rhs) :
98  m_coord(rhs.m_coord),
99  m_value(rhs.m_value),
101  m_flops(rhs.m_flops),
102  m_symmetric(rhs.m_symmetric) {}
103 
104  inline
107  {
108  m_coord = rhs.m_coord;
109  m_value = rhs.m_value;
110  m_dimension = rhs.m_dimension;
111  m_flops = rhs.m_flops;
112  m_symmetric = rhs.m_symmetric;
113  return *this ;
114  }
115 
117  KOKKOS_INLINE_FUNCTION
118  size_type dimension() const { return m_dimension; }
119 
121  KOKKOS_INLINE_FUNCTION
122  size_type num_coord() const { return m_coord.extent(0); }
123 
125  KOKKOS_INLINE_FUNCTION
126  size_type num_value() const { return m_value.extent(0); }
127 
129  KOKKOS_INLINE_FUNCTION
130  int get_i_begin(const size_type entry) const {
131  return m_coord(entry,0);
132  }
133 
135  KOKKOS_INLINE_FUNCTION
136  int get_j_begin(const size_type entry) const {
137  return m_coord(entry,1);
138  }
139 
141  KOKKOS_INLINE_FUNCTION
142  int get_k_begin(const size_type entry) const {
143  return m_coord(entry,2);
144  }
145 
147  KOKKOS_INLINE_FUNCTION
148  int get_p_i(const size_type entry) const {
149  return m_coord(entry,3);
150  }
151 
153  KOKKOS_INLINE_FUNCTION
154  int get_p_j(const size_type entry) const {
155  return m_coord(entry,4);
156  }
157 
159  KOKKOS_INLINE_FUNCTION
160  int get_p_k(const size_type entry) const {
161  return m_coord(entry,5);
162  }
163 
165  KOKKOS_INLINE_FUNCTION
166  int get_j_eq_k(const size_type entry) const {
167  return m_coord(entry,6);
168  }
169 
171  KOKKOS_INLINE_FUNCTION
172  const value_type& value(const size_type entry) const
173  { return m_value(entry); }
174 
176  KOKKOS_INLINE_FUNCTION
177  size_type num_non_zeros() const { return m_value.extent(0); }
178 
180  KOKKOS_INLINE_FUNCTION
181  size_type num_flops() const { return m_flops; }
182 
184  KOKKOS_INLINE_FUNCTION
185  bool symmetric() const { return m_symmetric; }
186 
187  template <typename OrdinalType>
192  {
193  using Teuchos::Array;
194  using Teuchos::RCP;
195 
196  // Allocate tensor data
198  tensor.m_dimension = basis.size();
199  tensor.m_symmetric = Cijk.symmetric();
200  tensor.m_coord = coord_array_type( "coord" , Cijk.num_leafs() );
201  tensor.m_value = value_array_type( "value" , Cijk.num_entries() );
202 
203  // Create mirror, is a view if is host memory
204  typename coord_array_type::HostMirror host_coord =
206  typename value_array_type::HostMirror host_value =
208 
209  // Fill flat 3 tensor
211  typedef typename Cijk_type::CijkNode node_type;
212  Array< RCP<const node_type> > node_stack;
213  Array< OrdinalType > index_stack;
214  node_stack.push_back(Cijk.getHeadNode());
215  index_stack.push_back(0);
217  OrdinalType child_index;
218  OrdinalType coord_index = 0;
219  OrdinalType value_index = 0;
220  tensor.m_flops = 0;
221  while (node_stack.size() > 0) {
222  node = node_stack.back();
223  child_index = index_stack.back();
224 
225  // Leaf
226  if (node->is_leaf) {
227  host_coord(coord_index, 0) = node->i_begin;
228  host_coord(coord_index, 1) = node->j_begin;
229  host_coord(coord_index, 2) = node->k_begin;
230  host_coord(coord_index, 3) = node->p_i;
231  host_coord(coord_index, 4) = node->p_j;
232  host_coord(coord_index, 5) = node->p_k;
233  host_coord(coord_index, 6) = node->parent_j_equals_k;
234  ++coord_index;
235  for (OrdinalType i=0; i<node->my_num_entries; ++i)
236  host_value(value_index++) = node->values[i];
237  tensor.m_flops += 5*node->my_num_entries + node->i_size;
238  node_stack.pop_back();
239  index_stack.pop_back();
240  }
241 
242  // More children to process -- process them first
243  else if (child_index < node->children.size()) {
244  ++index_stack.back();
245  node = node->children[child_index];
246  node_stack.push_back(node);
247  index_stack.push_back(0);
248  }
249 
250  // No more children
251  else {
252  node_stack.pop_back();
253  index_stack.pop_back();
254  }
255 
256  }
257  TEUCHOS_ASSERT(coord_index == Cijk.num_leafs());
258  TEUCHOS_ASSERT(value_index == Cijk.num_entries());
259 
260  /*
261  // Save block volumes to file
262  static int index = 0;
263  std::stringstream file_name;
264  file_name << "cijk_vol_" << index << ".txt";
265  std::ofstream file(file_name.str().c_str());
266  for (size_type i=0; i<coord_index; ++i) {
267  int vol = host_coord(i,3) * host_coord(i,4) * host_coord(i,5);
268  file << vol << std::endl;
269  }
270  file.close();
271  index++;
272  */
273 
274  // Copy data to device if necessary
275  Kokkos::deep_copy( tensor.m_coord , host_coord );
276  Kokkos::deep_copy( tensor.m_value , host_value );
277 
278  return tensor ;
279  }
280 };
281 
282 template< class Device , typename OrdinalType , typename ValueType >
283 LexicographicBlockSparse3Tensor<ValueType, Device>
288 {
290  basis, Cijk, params);
291 }
292 
293  template < typename ValueType, typename Device >
294 class BlockMultiply< LexicographicBlockSparse3Tensor< ValueType , Device > >
295 {
296 public:
297 
298  typedef typename Device::size_type size_type ;
300 
301  template< typename MatrixValue , typename VectorValue >
302  KOKKOS_INLINE_FUNCTION
303  static void apply( const tensor_type & tensor ,
304  const MatrixValue * const a ,
305  const VectorValue * const x ,
306  VectorValue * const y )
307  {
308  const size_type nBlock = tensor.num_coord();
309 
310  if (tensor.symmetric()) {
311 
312  // Loop over coordinate blocks
313  size_type value_entry = 0;
314  for ( size_type block = 0; block < nBlock; ++block) {
315  const int i_begin = tensor.get_i_begin(block);
316  const int j_begin = tensor.get_j_begin(block);
317  const int k_begin = tensor.get_k_begin(block);
318  const int p_i = tensor.get_p_i(block);
319  const int p_j = tensor.get_p_j(block);
320  const int p_k = tensor.get_p_k(block);
321  VectorValue * const y_block = y + i_begin;
322  const MatrixValue * const a_j_block = a + j_begin;
323  const VectorValue * const x_k_block = x + k_begin;
324  const MatrixValue * const a_k_block = a + k_begin;
325  const VectorValue * const x_j_block = x + j_begin;
326 
327  // for (int i=0; i<=p_i; ++i) {
328  // VectorValue ytmp = 0;
329  // for (int j=0; j<=p_j; ++j) {
330  // int k0 = j_eq_k != 0 ? j : 0;
331  // if (symmetric && (k0 % 2 != (i+j) % 2)) ++k0;
332  // for (int k=k0; k<=p_k; k+=k_inc) {
333  // ytmp += tensor.value(value_entry++) *
334  // ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
335  // }
336  // }
337  // y_block[i] += ytmp ;
338  // }
339 
340  if (tensor.get_j_eq_k(block) != 0) {
341  for (int i=0; i<=p_i; ++i) {
342  VectorValue ytmp = 0;
343  for (int j=0; j<=p_j; ++j) {
344  int k0 = j%2 != (i+j)%2 ? j+1 : j;
345  for (int k=k0; k<=p_k; k+=2) {
346  ytmp += tensor.value(value_entry++) *
347  ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
348  }
349  }
350  y_block[i] += ytmp ;
351  }
352  }
353  else {
354  for (int i=0; i<=p_i; ++i) {
355  VectorValue ytmp = 0;
356  for (int j=0; j<=p_j; ++j) {
357  for (int k=(i+j)%2; k<=p_k; k+=2) {
358  ytmp += tensor.value(value_entry++) *
359  ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
360  }
361  }
362  y_block[i] += ytmp ;
363  }
364  }
365  }
366 
367  }
368 
369  else {
370 
371  // Loop over coordinate blocks
372  size_type value_entry = 0;
373  for ( size_type block = 0; block < nBlock; ++block) {
374  const int i_begin = tensor.get_i_begin(block);
375  const int j_begin = tensor.get_j_begin(block);
376  const int k_begin = tensor.get_k_begin(block);
377  const int p_i = tensor.get_p_i(block);
378  const int p_j = tensor.get_p_j(block);
379  const int p_k = tensor.get_p_k(block);
380  VectorValue * const y_block = y + i_begin;
381  const MatrixValue * const a_j_block = a + j_begin;
382  const VectorValue * const x_k_block = x + k_begin;
383  const MatrixValue * const a_k_block = a + k_begin;
384  const VectorValue * const x_j_block = x + j_begin;
385 
386  // for (int i=0; i<=p_i; ++i) {
387  // VectorValue ytmp = 0;
388  // for (int j=0; j<=p_j; ++j) {
389  // int k0 = j_eq_k != 0 ? j : 0;
390  // if (symmetric && (k0 % 2 != (i+j) % 2)) ++k0;
391  // for (int k=k0; k<=p_k; k+=k_inc) {
392  // ytmp += tensor.value(value_entry++) *
393  // ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
394  // }
395  // }
396  // y_block[i] += ytmp ;
397  // }
398 
399  if (tensor.get_j_eq_k(block) != 0) {
400  for (int i=0; i<=p_i; ++i) {
401  VectorValue ytmp = 0;
402  for (int j=0; j<=p_j; ++j) {
403  for (int k=j; k<=p_k; ++k) {
404  ytmp += tensor.value(value_entry++) *
405  ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
406  }
407  }
408  y_block[i] += ytmp ;
409  }
410  }
411  else {
412  for (int i=0; i<=p_i; ++i) {
413  VectorValue ytmp = 0;
414  for (int j=0; j<=p_j; ++j) {
415  for (int k=0; k<=p_k; ++k) {
416  ytmp += tensor.value(value_entry++) *
417  ( a_j_block[j] * x_k_block[k] + a_k_block[k] * x_j_block[j] );
418  }
419  }
420  y_block[i] += ytmp ;
421  }
422  }
423  }
424 
425  }
426  }
427 
428  KOKKOS_INLINE_FUNCTION
429  static size_type matrix_size( const tensor_type & tensor )
430  { return tensor.dimension(); }
431 
432  KOKKOS_INLINE_FUNCTION
433  static size_type vector_size( const tensor_type & tensor )
434  { return tensor.dimension(); }
435 };
436 
437 } /* namespace Stokhos */
438 
439 //----------------------------------------------------------------------------
440 //----------------------------------------------------------------------------
441 
442 #endif /* #ifndef STOKHOS_LEXICOGRAPHIC_BLOCK_SPARSE_3_TENSOR_HPP */
Teuchos::RCP< const CijkNode > getHeadNode() const
Get the head node.
KOKKOS_INLINE_FUNCTION int get_j_begin(const size_type entry) const
LexicographicBlockSparse3Tensor< ValueType, Device > create_lexicographic_block_sparse_3_tensor(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::LTBSparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList &params=Teuchos::ParameterList())
LexicographicBlockSparse3Tensor(const LexicographicBlockSparse3Tensor &rhs)
KOKKOS_INLINE_FUNCTION int get_p_k(const size_type entry) const
static KOKKOS_INLINE_FUNCTION void apply(const tensor_type &tensor, const MatrixValue *const a, const VectorValue *const x, VectorValue *const y)
KOKKOS_INLINE_FUNCTION int get_j_eq_k(const size_type entry) const
Kokkos::View< value_type[], execution_space > value_array_type
ordinal_type num_entries() const
Return number of non-zero entries.
ordinal_type num_leafs() const
Return number of nodes.
KOKKOS_INLINE_FUNCTION size_type num_value() const
Number of values.
LexicographicBlockSparse3Tensor & operator=(const LexicographicBlockSparse3Tensor &rhs)
Kokkos::Serial node_type
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
KOKKOS_INLINE_FUNCTION int get_k_begin(const size_type entry) const
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
KOKKOS_INLINE_FUNCTION size_type num_coord() const
Number of coordinates.
KOKKOS_INLINE_FUNCTION size_type num_flops() const
Number flop&#39;s per multiply-add.
Kokkos::View< int[][7], Kokkos::LayoutRight, execution_space > coord_array_type
Stokhos::Sparse3Tensor< int, double > Cijk_type
KOKKOS_INLINE_FUNCTION int get_p_i(const size_type entry) const
reference back()
void push_back(const value_type &x)
bool symmetric() const
Return if symmetric.
KOKKOS_INLINE_FUNCTION bool symmetric() const
Is PDF symmetric.
size_type size() const
KOKKOS_INLINE_FUNCTION const value_type & value(const size_type entry) const
Cijk for entry &#39;entry&#39;.
KOKKOS_INLINE_FUNCTION size_type dimension() const
Dimension of the tensor.
static LexicographicBlockSparse3Tensor create(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::LTBSparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList &params=Teuchos::ParameterList())
KOKKOS_INLINE_FUNCTION int get_p_j(const size_type entry) const
KOKKOS_INLINE_FUNCTION int get_i_begin(const size_type entry) const
#define TEUCHOS_ASSERT(assertion_test)
virtual ordinal_type size() const =0
Return total size of basis.
KOKKOS_INLINE_FUNCTION size_type num_non_zeros() const
Number of non-zero&#39;s.
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)