42 #ifndef STOKHOS_LEXICOGRAPHIC_BLOCK_SPARSE_3_TENSOR_HPP
43 #define STOKHOS_LEXICOGRAPHIC_BLOCK_SPARSE_3_TENSOR_HPP
45 #include "Kokkos_Core.hpp"
63 template<
typename ValueType ,
class ExecutionSpace >
68 typedef typename execution_space::size_type
size_type;
73 typedef Kokkos::View< int[][7] , Kokkos::LayoutRight, execution_space >
coord_array_type;
117 KOKKOS_INLINE_FUNCTION
121 KOKKOS_INLINE_FUNCTION
125 KOKKOS_INLINE_FUNCTION
129 KOKKOS_INLINE_FUNCTION
135 KOKKOS_INLINE_FUNCTION
141 KOKKOS_INLINE_FUNCTION
147 KOKKOS_INLINE_FUNCTION
153 KOKKOS_INLINE_FUNCTION
159 KOKKOS_INLINE_FUNCTION
165 KOKKOS_INLINE_FUNCTION
171 KOKKOS_INLINE_FUNCTION
176 KOKKOS_INLINE_FUNCTION
180 KOKKOS_INLINE_FUNCTION
184 KOKKOS_INLINE_FUNCTION
187 template <
typename OrdinalType>
204 typename coord_array_type::HostMirror host_coord =
206 typename value_array_type::HostMirror host_value =
211 typedef typename Cijk_type::CijkNode
node_type;
217 OrdinalType child_index;
218 OrdinalType coord_index = 0;
219 OrdinalType value_index = 0;
221 while (node_stack.
size() > 0) {
222 node = node_stack.
back();
223 child_index = index_stack.
back();
227 host_coord(coord_index, 0) = node->i_begin;
228 host_coord(coord_index, 1) = node->j_begin;
229 host_coord(coord_index, 2) = node->k_begin;
230 host_coord(coord_index, 3) = node->p_i;
231 host_coord(coord_index, 4) = node->p_j;
232 host_coord(coord_index, 5) = node->p_k;
233 host_coord(coord_index, 6) = node->parent_j_equals_k;
235 for (OrdinalType i=0; i<node->my_num_entries; ++i)
236 host_value(value_index++) = node->values[i];
237 tensor.
m_flops += 5*node->my_num_entries + node->i_size;
243 else if (child_index < node->children.size()) {
244 ++index_stack.
back();
245 node = node->children[child_index];
282 template<
class Device ,
typename OrdinalType ,
typename ValueType >
283 LexicographicBlockSparse3Tensor<ValueType, Device>
290 basis, Cijk, params);
293 template <
typename ValueType,
typename Device >
301 template<
typename MatrixValue ,
typename VectorValue >
302 KOKKOS_INLINE_FUNCTION
304 const MatrixValue *
const a ,
305 const VectorValue *
const x ,
306 VectorValue *
const y )
314 for (
size_type block = 0; block < nBlock; ++block) {
318 const int p_i = tensor.
get_p_i(block);
319 const int p_j = tensor.
get_p_j(block);
320 const int p_k = tensor.
get_p_k(block);
321 VectorValue *
const y_block = y + i_begin;
322 const MatrixValue *
const a_j_block = a + j_begin;
323 const VectorValue *
const x_k_block = x + k_begin;
324 const MatrixValue *
const a_k_block = a + k_begin;
325 const VectorValue *
const x_j_block = x + j_begin;
341 for (
int i=0; i<=p_i; ++i) {
342 VectorValue ytmp = 0;
343 for (
int j=0;
j<=p_j; ++
j) {
344 int k0 =
j%2 != (i+
j)%2 ?
j+1 :
j;
345 for (
int k=k0; k<=p_k; k+=2) {
346 ytmp += tensor.
value(value_entry++) *
347 ( a_j_block[
j] * x_k_block[k] + a_k_block[k] * x_j_block[
j] );
354 for (
int i=0; i<=p_i; ++i) {
355 VectorValue ytmp = 0;
356 for (
int j=0;
j<=p_j; ++
j) {
357 for (
int k=(i+
j)%2; k<=p_k; k+=2) {
358 ytmp += tensor.
value(value_entry++) *
359 ( a_j_block[
j] * x_k_block[k] + a_k_block[k] * x_j_block[
j] );
373 for (
size_type block = 0; block < nBlock; ++block) {
377 const int p_i = tensor.
get_p_i(block);
378 const int p_j = tensor.
get_p_j(block);
379 const int p_k = tensor.
get_p_k(block);
380 VectorValue *
const y_block = y + i_begin;
381 const MatrixValue *
const a_j_block = a + j_begin;
382 const VectorValue *
const x_k_block = x + k_begin;
383 const MatrixValue *
const a_k_block = a + k_begin;
384 const VectorValue *
const x_j_block = x + j_begin;
400 for (
int i=0; i<=p_i; ++i) {
401 VectorValue ytmp = 0;
402 for (
int j=0;
j<=p_j; ++
j) {
403 for (
int k=
j; k<=p_k; ++k) {
404 ytmp += tensor.
value(value_entry++) *
405 ( a_j_block[
j] * x_k_block[k] + a_k_block[k] * x_j_block[
j] );
412 for (
int i=0; i<=p_i; ++i) {
413 VectorValue ytmp = 0;
414 for (
int j=0;
j<=p_j; ++
j) {
415 for (
int k=0; k<=p_k; ++k) {
416 ytmp += tensor.
value(value_entry++) *
417 ( a_j_block[
j] * x_k_block[k] + a_k_block[k] * x_j_block[
j] );
428 KOKKOS_INLINE_FUNCTION
432 KOKKOS_INLINE_FUNCTION
Teuchos::RCP< const CijkNode > getHeadNode() const
Get the head node.
KOKKOS_INLINE_FUNCTION int get_j_begin(const size_type entry) const
LexicographicBlockSparse3Tensor< ValueType, Device > create_lexicographic_block_sparse_3_tensor(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::LTBSparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList ¶ms=Teuchos::ParameterList())
LexicographicBlockSparse3Tensor(const LexicographicBlockSparse3Tensor &rhs)
KOKKOS_INLINE_FUNCTION int get_p_k(const size_type entry) const
static KOKKOS_INLINE_FUNCTION void apply(const tensor_type &tensor, const MatrixValue *const a, const VectorValue *const x, VectorValue *const y)
KOKKOS_INLINE_FUNCTION int get_j_eq_k(const size_type entry) const
Kokkos::View< value_type[], execution_space > value_array_type
ordinal_type num_entries() const
Return number of non-zero entries.
ordinal_type num_leafs() const
Return number of nodes.
KOKKOS_INLINE_FUNCTION size_type num_value() const
Number of values.
LexicographicBlockSparse3Tensor & operator=(const LexicographicBlockSparse3Tensor &rhs)
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
ExecutionSpace execution_space
Device::size_type size_type
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
static KOKKOS_INLINE_FUNCTION size_type vector_size(const tensor_type &tensor)
execution_space::size_type size_type
static KOKKOS_INLINE_FUNCTION size_type matrix_size(const tensor_type &tensor)
KOKKOS_INLINE_FUNCTION int get_k_begin(const size_type entry) const
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
KOKKOS_INLINE_FUNCTION size_type num_coord() const
Number of coordinates.
KOKKOS_INLINE_FUNCTION size_type num_flops() const
Number flop's per multiply-add.
Kokkos::View< int[][7], Kokkos::LayoutRight, execution_space > coord_array_type
Stokhos::Sparse3Tensor< int, double > Cijk_type
KOKKOS_INLINE_FUNCTION int get_p_i(const size_type entry) const
void push_back(const value_type &x)
bool symmetric() const
Return if symmetric.
LexicographicBlockSparse3Tensor< ValueType, Device > tensor_type
KOKKOS_INLINE_FUNCTION bool symmetric() const
Is PDF symmetric.
LexicographicBlockSparse3Tensor()
KOKKOS_INLINE_FUNCTION const value_type & value(const size_type entry) const
Cijk for entry 'entry'.
~LexicographicBlockSparse3Tensor()
KOKKOS_INLINE_FUNCTION size_type dimension() const
Dimension of the tensor.
static LexicographicBlockSparse3Tensor create(const Stokhos::ProductBasis< OrdinalType, ValueType > &basis, const Stokhos::LTBSparse3Tensor< OrdinalType, ValueType > &Cijk, const Teuchos::ParameterList ¶ms=Teuchos::ParameterList())
KOKKOS_INLINE_FUNCTION int get_p_j(const size_type entry) const
KOKKOS_INLINE_FUNCTION int get_i_begin(const size_type entry) const
#define TEUCHOS_ASSERT(assertion_test)
virtual ordinal_type size() const =0
Return total size of basis.
KOKKOS_INLINE_FUNCTION size_type num_non_zeros() const
Number of non-zero's.
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)