Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_CrsMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef STOKHOS_CRSMATRIX_HPP
11 #define STOKHOS_CRSMATRIX_HPP
12 
13 #include <fstream>
14 #include <iomanip>
15 
16 #include "Kokkos_Core.hpp"
17 #include "KokkosSparse_StaticCrsGraph.hpp"
18 
19 #include "Stokhos_Multiply.hpp"
20 #include "Stokhos_MatrixMarket.hpp"
21 
22 namespace Stokhos {
23 
24 struct DeviceConfig {
25  struct Dim3 {
26  size_t x, y, z;
27  Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) :
28  x(x_), y(y_), z(z_) {}
29  };
30 
32  size_t num_blocks;
34 
35  DeviceConfig(const size_t num_blocks_,
36  const size_t threads_per_block_x_,
37  const size_t threads_per_block_y_ = 1,
38  const size_t threads_per_block_z_ = 1) :
39  block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
40  num_blocks(num_blocks_),
42  {}
43 };
44 
46 template <typename ValueType, typename Device,
47  typename Layout = Kokkos::LayoutRight>
48 class CrsMatrix {
49 public:
50  typedef Device execution_space;
51  typedef ValueType value_type;
52  typedef Kokkos::View< value_type[], Layout, execution_space > values_type;
53 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
54  typedef KokkosSparse::StaticCrsGraph< int , Layout, execution_space , int > graph_type;
55 #else
56  typedef KokkosSparse::StaticCrsGraph< int , Layout, execution_space , void, int > graph_type;
57 #endif
58 
61 
65 
66  CrsMatrix() : dev_config(0, 0) {}
67  CrsMatrix(Stokhos::DeviceConfig dev_config_) : dev_config(dev_config_) {}
68 };
69 
70 // Generic matrix vector multiply kernel for CrsMatrix
71 template <typename MatrixValue,
72  typename Layout,
73  typename Device,
74  typename InputVectorType,
75  typename OutputVectorType>
76 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
77  InputVectorType,
78  OutputVectorType,
79  void,
80  IntegralRank<1> >
81 {
82 public:
84  typedef InputVectorType input_vector_type;
85  typedef OutputVectorType output_vector_type;
86 
87  typedef Device execution_space;
88  typedef typename execution_space::size_type size_type;
90 
94 
96  const input_vector_type& x,
98  : m_A( A )
99  , m_x( x )
100  , m_y( y )
101  {}
102 
103  //--------------------------------------------------------------------------
104 
105  KOKKOS_INLINE_FUNCTION
106  void operator()( const size_type iRow ) const
107  {
108  const size_type iEntryBegin = m_A.graph.row_map[iRow];
109  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
110 
111  scalar_type sum = 0;
112 
113  for ( size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry ) {
114  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry) );
115  }
116 
117  m_y(iRow) = sum;
118  }
119 
120  static void apply( const matrix_type & A,
121  const input_vector_type & x,
122  output_vector_type & y )
123  {
124  const size_t row_count = A.graph.row_map.extent(0) - 1;
125  Kokkos::parallel_for( row_count, Multiply(A,x,y) );
126  }
127 };
128 
129 // Generic matrix multi-vector multiply kernel for CrsMatrix
130 template <typename MatrixValue,
131  typename Layout,
132  typename Device,
133  typename InputMultiVectorType,
134  typename OutputMultiVectorType,
135  typename OrdinalType >
136 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
137  InputMultiVectorType,
138  OutputMultiVectorType,
139  std::vector<OrdinalType>,
140  IntegralRank<2> >
141 {
142 public:
144  typedef InputMultiVectorType input_multi_vector_type;
145  typedef OutputMultiVectorType output_multi_vector_type;
146  typedef std::vector<OrdinalType> column_indices_type;
147 
148  typedef Device execution_space;
149  typedef typename execution_space::size_type size_type;
151 
157 
159  const input_multi_vector_type& x,
161  const column_indices_type& col_indices )
162  : m_A( A )
163  , m_x( x )
164  , m_y( y )
165  , m_col_indices( col_indices )
166  , m_num_vecs( col_indices.size() )
167  {}
168 
169  //--------------------------------------------------------------------------
170 
171  KOKKOS_INLINE_FUNCTION
172  void operator()( const size_type iRow ) const
173  {
174  const size_type iEntryBegin = m_A.graph.row_map[iRow];
175  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
176 
177  for (size_type j=0; j<m_num_vecs; j++) {
178  size_type iCol = m_col_indices[j];
179 
180  scalar_type sum = 0.0;
181 
182  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
183  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
184  }
185 
186  m_y( iRow, iCol ) = sum;
187 
188  }
189 
190  }
191 
192  static void apply( const matrix_type& A,
193  const input_multi_vector_type& x,
195  const column_indices_type& col )
196  {
197  const size_t n = A.graph.row_map.extent(0) - 1 ;
198  //Kokkos::parallel_for( n , Multiply(A,x,y,col) );
199 
200  const size_t block_size = 20;
201  const size_t num_vecs = col.size();
202  std::vector<OrdinalType> block_col;
203  block_col.reserve(block_size);
204  for (size_t block=0; block<num_vecs; block+=block_size) {
205  const size_t bs =
206  block+block_size <= num_vecs ? block_size : num_vecs-block;
207  block_col.resize(bs);
208  for (size_t i=0; i<bs; ++i)
209  block_col[i] = col[block+i];
210  Kokkos::parallel_for( n , Multiply(A,x,y,block_col) );
211  }
212  }
213 };
214 
215 #define USE_NEW 1
216 #if USE_NEW
217 // Generic matrix multi-vector multiply kernel for CrsMatrix
218 // Experimenting with blocking of column and row loops to improve cache
219 // performance. Seems to help signficantly on SandyBridge, little difference
220 // on MIC (although not extensive investigation of block sizes).
221 template <typename MatrixValue,
222  typename Layout,
223  typename Device,
224  typename InputMultiVectorType,
225  typename OutputMultiVectorType >
226 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
227  InputMultiVectorType,
228  OutputMultiVectorType,
229  void,
230  IntegralRank<2> >
231 {
232 public:
234  typedef InputMultiVectorType input_multi_vector_type;
235  typedef OutputMultiVectorType output_multi_vector_type;
236 
237  typedef Device execution_space;
238  typedef typename execution_space::size_type size_type;
240 
246 
247  static const size_type m_block_row_size = 32;
248  static const size_type m_block_col_size = 20;
249 
251  const input_multi_vector_type& x,
253  : m_A( A )
254  , m_x( x )
255  , m_y( y )
256  , m_num_row( A.graph.row_map.extent(0)-1 )
257  , m_num_col( m_y.extent(1) )
258  {
259  }
260 
261  //--------------------------------------------------------------------------
262 
263  KOKKOS_INLINE_FUNCTION
264  void operator()( const size_type iBlockRow ) const
265  {
266  // Number of rows in this block
267  const size_type num_row =
268  iBlockRow+m_block_row_size <= m_num_row ?
269  m_block_row_size : m_num_row-iBlockRow;
270 
271  // Loop over block columns of x
272  for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
273  // Number of columns in this block
274  const size_type num_col =
275  iBlockCol+m_block_col_size <= m_num_col ?
276  m_block_col_size : m_num_col-iBlockCol;
277 
278  // Loop over rows in this block of A
279  const size_type iRowEnd = iBlockRow + num_row;
280  for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
281 
282  // Range of column entries for this row
283  const size_type iEntryBegin = m_A.graph.row_map[iRow];
284  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
285 
286  // Loop over columns in this block of x
287  const size_type iColEnd = iBlockCol + num_col;
288  for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
289 
290  // Loop columns of A for this row
291  scalar_type sum = 0.0;
292  for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
293  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
294  }
295  m_y( iRow, iCol ) = sum;
296 
297  }
298 
299  }
300 
301  }
302 
303  }
304 
305  static void apply( const matrix_type & A,
306  const input_multi_vector_type& x,
308  {
309  // Parallelize over row blocks of size m_block_row_size
310  const size_type num_row = A.graph.row_map.extent(0) - 1;
311  const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
312  Kokkos::parallel_for( n , Multiply(A,x,y) );
313  }
314 };
315 #else
316 // Generic matrix multi-vector multiply kernel for CrsMatrix
317 template <typename MatrixValue,
318  typename Layout,
319  typename Device,
320  typename InputMultiVectorType,
321  typename OutputMultiVectorType >
322 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
323  InputMultiVectorType,
324  OutputMultiVectorType,
325  void,
326  IntegralRank<2> >
327 {
328 public:
329  typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
330  typedef InputMultiVectorType input_multi_vector_type;
331  typedef OutputMultiVectorType output_multi_vector_type;
332 
333  typedef Device execution_space;
334  typedef typename execution_space::size_type size_type;
336 
337  const matrix_type m_A;
338  const input_multi_vector_type m_x;
339  output_multi_vector_type m_y;
340  const size_type m_num_vecs;
341 
342  Multiply( const matrix_type& A,
343  const input_multi_vector_type& x,
344  output_multi_vector_type& y)
345  : m_A( A )
346  , m_x( x )
347  , m_y( y )
348  , m_num_vecs( m_y.extent(1) )
349  {}
350 
351  //--------------------------------------------------------------------------
352 
353  KOKKOS_INLINE_FUNCTION
354  void operator()( const size_type iRow ) const
355  {
356  const size_type iEntryBegin = m_A.graph.row_map[iRow];
357  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
358 
359  for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
360 
361  scalar_type sum = 0.0;
362 
363  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
364  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
365  }
366 
367  m_y( iRow, iCol ) = sum;
368 
369  }
370 
371  }
372 
373  static void apply( const matrix_type& A,
374  const input_multi_vector_type& x,
375  output_multi_vector_type& y )
376  {
377  const size_t n = A.graph.row_map.extent(0) - 1 ;
378  Kokkos::parallel_for( n , Multiply(A,x,y) );
379 
380  // const size_t block_size = 20;
381  // const size_t num_vecs = col.size();
382  // std::vector<OrdinalType> block_col;
383  // block_col.reserve(block_size);
384  // for (size_t block=0; block<num_vecs; block+=block_size) {
385  // const size_t bs =
386  // block+block_size <= num_vecs ? block_size : num_vecs-block;
387  // block_col.resize(bs);
388  // for (size_t i=0; i<bs; ++i)
389  // block_col[i] = col[block+i];
390  // Kokkos::parallel_for( n , Multiply(A,x,y,block_col) );
391  // }
392  }
393 };
394 #endif
395 
396 #if USE_NEW
397 // Generic matrix multi-vector multiply kernel for CrsMatrix
398 // Experimenting with blocking of column and row loops to improve cache
399 // performance. Seems to help signficantly on SandyBridge, little difference
400 // on MIC (although not extensive investigation of block sizes).
401 template <typename MatrixValue,
402  typename Layout,
403  typename Device,
404  typename InputViewType,
405  typename OutputViewType>
406 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
407  std::vector<InputViewType>,
408  std::vector<OutputViewType>,
409  void,
410  IntegralRank<1> >
411 {
412 public:
414  typedef std::vector<InputViewType> input_multi_vector_type;
415  typedef std::vector<OutputViewType> output_multi_vector_type;
416 
417  typedef Device execution_space;
418  typedef typename execution_space::size_type size_type;
420 
426 
427  static const size_type m_block_row_size = 32;
428  static const size_type m_block_col_size = 20;
429 
431  const input_multi_vector_type& x,
433  : m_A( A )
434  , m_x( x )
435  , m_y( y )
436  , m_num_row( A.graph.row_map.extent(0)-1 )
437  , m_num_col( x.size() )
438  {
439  }
440 
441  //--------------------------------------------------------------------------
442 
443  KOKKOS_INLINE_FUNCTION
444  void operator()( const size_type iBlockRow ) const
445  {
446  // Number of rows in this block
447  const size_type num_row =
448  iBlockRow+m_block_row_size <= m_num_row ?
449  m_block_row_size : m_num_row-iBlockRow;
450 
451  // Loop over block columns of x
452  for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
453  // Number of columns in this block
454  const size_type num_col =
455  iBlockCol+m_block_col_size <= m_num_col ?
456  m_block_col_size : m_num_col-iBlockCol;
457 
458  // Loop over rows in this block of A
459  const size_type iRowEnd = iBlockRow + num_row;
460  for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
461 
462  // Range of column entries for this row
463  const size_type iEntryBegin = m_A.graph.row_map[iRow];
464  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
465 
466  // Loop over columns in this block of x
467  const size_type iColEnd = iBlockCol + num_col;
468  for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
469 
470  // Loop columns of A for this row
471  scalar_type sum = 0.0;
472  for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
473  sum += m_A.values(iEntry) * m_x[iCol](m_A.graph.entries(iEntry));
474  }
475  m_y[iCol](iRow) = sum;
476 
477  }
478 
479  }
480 
481  }
482 
483  }
484 
485  static void apply( const matrix_type & A,
486  const input_multi_vector_type& x,
488  {
489  // Parallelize over row blocks of size m_block_row_size
490  const size_type num_row = A.graph.row_map.extent(0) - 1;
491  const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
492  Kokkos::parallel_for( n , Multiply(A,x,y) );
493  }
494 };
495 #else
496 // Generic matrix multi-vector multiply kernel for CrsMatrix
497 template <typename MatrixValue,
498  typename Layout,
499  typename Device,
500  typename InputViewType,
501  typename OutputViewType>
502 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
503  std::vector<InputViewType>,
504  std::vector<OutputViewType>,
505  void,
506  IntegralRank<1> >
507 {
508 public:
509  typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
510  typedef std::vector<InputViewType> input_multi_vector_type;
511  typedef std::vector<OutputViewType> output_multi_vector_type;
512 
513  typedef Device execution_space;
514  typedef typename execution_space::size_type size_type;
515  typedef typename OutputViewType::value_type scalar_type;
516 
517  const matrix_type m_A;
518  const input_multi_vector_type m_x;
519  output_multi_vector_type m_y;
520  const size_type m_num_vecs;
521 
522  Multiply( const matrix_type& A,
523  const input_multi_vector_type& x,
524  output_multi_vector_type& y )
525  : m_A( A )
526  , m_x( x )
527  , m_y( y )
528  , m_num_vecs( x.size() )
529  {
530  }
531 
532  //--------------------------------------------------------------------------
533 
534  KOKKOS_INLINE_FUNCTION
535  void operator()( const size_type iRow ) const
536  {
537  const size_type iEntryBegin = m_A.graph.row_map[iRow];
538  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
539 
540  for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
541 
542  scalar_type sum = 0.0;
543 
544  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
545  sum += m_A.values(iEntry) * m_x[iCol]( m_A.graph.entries(iEntry) );
546  }
547 
548  m_y[iCol]( iRow) = sum;
549 
550  }
551 
552  }
553 
554  static void apply( const matrix_type & A,
555  const input_multi_vector_type& x,
556  output_multi_vector_type& y )
557  {
558  const size_t n = A.graph.row_map.extent(0) - 1 ;
559  Kokkos::parallel_for( n , Multiply(A,x,y) );
560 
561  // const size_t block_size = 20;
562  // const size_t num_vecs = x.size();
563  // input_multi_vector_type xx;
564  // output_multi_vector_type yy;
565  // xx.reserve(block_size);
566  // yy.reserve(block_size);
567  // for (size_t block=0; block<num_vecs; block+=block_size) {
568  // const size_t bs =
569  // block+block_size <= num_vecs ? block_size : num_vecs-block;
570  // xx.resize(bs);
571  // yy.resize(bs);
572  // for (size_t i=0; i<bs; ++i) {
573  // xx[i] = x[block+i];
574  // yy[i] = y[block+i];
575  // }
576  // Kokkos::parallel_for( n , Multiply(A,xx,yy) );
577  // }
578  }
579 };
580 #endif
581 
582 // Matrix multivector multiply specializations for one column at a time
584 template <typename MatrixValue,
585  typename Layout,
586  typename Device,
587  typename InputMultiVectorType,
588  typename OutputMultiVectorType,
589  typename OrdinalType>
591  const InputMultiVectorType& x,
592  OutputMultiVectorType& y,
593  const std::vector<OrdinalType>& col_indices,
595 {
596  typedef CrsMatrix<MatrixValue,Device,Layout> MatrixType;
597 
598  typedef Kokkos::View<typename InputMultiVectorType::value_type*, typename InputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> InputVectorType;
599  typedef Kokkos::View<typename OutputMultiVectorType::value_type*, typename OutputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> OutputVectorType;
601  for (size_t i=0; i<col_indices.size(); ++i) {
602  InputVectorType x_view =
603  Kokkos::subview( x , Kokkos::ALL() , col_indices[i] );
604  OutputVectorType y_view =
605  Kokkos::subview( y , Kokkos::ALL() , col_indices[i] );
606  multiply_type::apply( A , x_view , y_view );
607  }
608 }
609 
610 template <typename MatrixValue,
611  typename Layout,
612  typename Device,
613  typename InputVectorType,
614  typename OutputVectorType>
616  const std::vector<InputVectorType>& x,
617  std::vector<OutputVectorType>& y,
619 {
620  typedef CrsMatrix<MatrixValue,Device,Layout> MatrixType;
622  for (size_t i=0; i<x.size(); ++i) {
623  multiply_type::apply( A , x[i] , y[i] );
624  }
625 }
626 
627 } // namespace Stokhos
628 
629 //----------------------------------------------------------------------------
630 //----------------------------------------------------------------------------
631 
632 namespace Kokkos {
633 
634 template <typename ValueType, typename Layout, typename Device>
638  mirror_A.values = Kokkos::create_mirror(A.values);
639  mirror_A.graph = Kokkos::create_mirror(A.graph); // this deep copies
640  mirror_A.dev_config = A.dev_config;
641  return mirror_A;
642 }
643 
644 template <typename ValueType, typename Layout, typename Device>
649  mirror_A.graph = Kokkos::create_mirror(A.graph); // this deep copies
650  mirror_A.dev_config = A.dev_config;
651  return mirror_A;
652 }
653 
654 template <typename ValueType, typename Layout, typename DstDevice,
655  typename SrcDevice>
656 void
659  Kokkos::deep_copy(dst.values, src.values);
660 }
661 
662 } // namespace Kokkos
663 
664 //----------------------------------------------------------------------------
665 //----------------------------------------------------------------------------
666 
667 namespace Stokhos {
668 
669 // MatrixMarket writer for CrsMatrix
670 template < typename MatrixValue, typename Layout, typename Device >
671 class MatrixMarketWriter< CrsMatrix<MatrixValue,Device,Layout> >
672 {
673 public:
675  typedef Device execution_space ;
676  typedef typename execution_space::size_type size_type ;
677 
678  static void write(const matrix_type& A, const std::string& filename) {
679  std::ofstream file(filename.c_str());
680  file.precision(16);
681  file.setf(std::ios::scientific);
682 
684  Kokkos::deep_copy(hA, A);
685 
686  const size_type nRow = hA.graph.row_map.extent(0) - 1 ;
687 
688  // Write banner
689  file << "%%MatrixMarket matrix coordinate real general" << std::endl;
690  file << nRow << " " << nRow << " " << hA.values.extent(0) << std::endl;
691 
692  for (size_type row=0; row<nRow; ++row) {
693  size_type entryBegin = hA.graph.row_map(row);
694  size_type entryEnd = hA.graph.row_map(row+1);
695  for (size_type entry=entryBegin; entry<entryEnd; ++entry) {
696  file << row+1 << " " << hA.graph.entries(entry)+1 << " "
697  << std::setw(22) << hA.values(entry) << std::endl;
698  }
699  }
700 
701  file.close();
702  }
703 };
704 
705 } // namespace Stokhos
706 
707 #endif /* #ifndef STOKHOS_CRSMATRIX_HPP */
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
Kokkos::DefaultExecutionSpace execution_space
Stokhos::CrsMatrix< ValueType, Device, Layout >::host_mirror_type create_mirror(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y, const column_indices_type &col)
CrsMatrix(Stokhos::DeviceConfig dev_config_)
Multiply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y, const column_indices_type &col_indices)
Stokhos::CrsMatrix< ValueType, Device, Layout >::host_mirror_type create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
static void write(const matrix_type &A, const std::string &filename)
DeviceConfig(const size_t num_blocks_, const size_t threads_per_block_x_, const size_t threads_per_block_y_=1, const size_t threads_per_block_z_=1)
Kokkos::View< value_type[], Layout, execution_space > values_type
static void apply(const matrix_type &A, const input_vector_type &x, output_vector_type &y)
KokkosSparse::StaticCrsGraph< int, Layout, execution_space, void, int > graph_type
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
CrsMatrix< ValueType, typename values_type::host_mirror_space, Layout > host_mirror_type
Stokhos::DeviceConfig dev_config
host_mirror_type HostMirror
int n
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)