Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_CrsMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef STOKHOS_CRSMATRIX_HPP
11 #define STOKHOS_CRSMATRIX_HPP
12 
13 #include <fstream>
14 #include <iomanip>
15 
16 #include "Kokkos_Core.hpp"
17 #include "Kokkos_StaticCrsGraph.hpp"
18 
19 #include "Stokhos_Multiply.hpp"
20 #include "Stokhos_MatrixMarket.hpp"
21 
22 namespace Stokhos {
23 
24 struct DeviceConfig {
25  struct Dim3 {
26  size_t x, y, z;
27  Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) :
28  x(x_), y(y_), z(z_) {}
29  };
30 
32  size_t num_blocks;
34 
35  DeviceConfig(const size_t num_blocks_,
36  const size_t threads_per_block_x_,
37  const size_t threads_per_block_y_ = 1,
38  const size_t threads_per_block_z_ = 1) :
39  block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
40  num_blocks(num_blocks_),
42  {}
43 };
44 
46 template <typename ValueType, typename Device,
47  typename Layout = Kokkos::LayoutRight>
48 class CrsMatrix {
49 public:
50  typedef Device execution_space;
51  typedef ValueType value_type;
52  typedef Kokkos::View< value_type[], Layout, execution_space > values_type;
53 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
54  typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , int > graph_type;
55 #else
56  typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , void, int > graph_type;
57 #endif
58 
60 
64 
65  CrsMatrix() : dev_config(0, 0) {}
66  CrsMatrix(Stokhos::DeviceConfig dev_config_) : dev_config(dev_config_) {}
67 };
68 
69 // Generic matrix vector multiply kernel for CrsMatrix
70 template <typename MatrixValue,
71  typename Layout,
72  typename Device,
73  typename InputVectorType,
74  typename OutputVectorType>
75 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
76  InputVectorType,
77  OutputVectorType,
78  void,
79  IntegralRank<1> >
80 {
81 public:
83  typedef InputVectorType input_vector_type;
84  typedef OutputVectorType output_vector_type;
85 
86  typedef Device execution_space;
87  typedef typename execution_space::size_type size_type;
89 
93 
95  const input_vector_type& x,
97  : m_A( A )
98  , m_x( x )
99  , m_y( y )
100  {}
101 
102  //--------------------------------------------------------------------------
103 
104  KOKKOS_INLINE_FUNCTION
105  void operator()( const size_type iRow ) const
106  {
107  const size_type iEntryBegin = m_A.graph.row_map[iRow];
108  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
109 
110  scalar_type sum = 0;
111 
112  for ( size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry ) {
113  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry) );
114  }
115 
116  m_y(iRow) = sum;
117  }
118 
119  static void apply( const matrix_type & A,
120  const input_vector_type & x,
121  output_vector_type & y )
122  {
123  const size_t row_count = A.graph.row_map.extent(0) - 1;
124  Kokkos::parallel_for( row_count, Multiply(A,x,y) );
125  }
126 };
127 
128 // Generic matrix multi-vector multiply kernel for CrsMatrix
129 template <typename MatrixValue,
130  typename Layout,
131  typename Device,
132  typename InputMultiVectorType,
133  typename OutputMultiVectorType,
134  typename OrdinalType >
135 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
136  InputMultiVectorType,
137  OutputMultiVectorType,
138  std::vector<OrdinalType>,
139  IntegralRank<2> >
140 {
141 public:
143  typedef InputMultiVectorType input_multi_vector_type;
144  typedef OutputMultiVectorType output_multi_vector_type;
145  typedef std::vector<OrdinalType> column_indices_type;
146 
147  typedef Device execution_space;
148  typedef typename execution_space::size_type size_type;
150 
156 
158  const input_multi_vector_type& x,
160  const column_indices_type& col_indices )
161  : m_A( A )
162  , m_x( x )
163  , m_y( y )
164  , m_col_indices( col_indices )
165  , m_num_vecs( col_indices.size() )
166  {}
167 
168  //--------------------------------------------------------------------------
169 
170  KOKKOS_INLINE_FUNCTION
171  void operator()( const size_type iRow ) const
172  {
173  const size_type iEntryBegin = m_A.graph.row_map[iRow];
174  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
175 
176  for (size_type j=0; j<m_num_vecs; j++) {
177  size_type iCol = m_col_indices[j];
178 
179  scalar_type sum = 0.0;
180 
181  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
182  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
183  }
184 
185  m_y( iRow, iCol ) = sum;
186 
187  }
188 
189  }
190 
191  static void apply( const matrix_type& A,
192  const input_multi_vector_type& x,
194  const column_indices_type& col )
195  {
196  const size_t n = A.graph.row_map.extent(0) - 1 ;
197  //Kokkos::parallel_for( n , Multiply(A,x,y,col) );
198 
199  const size_t block_size = 20;
200  const size_t num_vecs = col.size();
201  std::vector<OrdinalType> block_col;
202  block_col.reserve(block_size);
203  for (size_t block=0; block<num_vecs; block+=block_size) {
204  const size_t bs =
205  block+block_size <= num_vecs ? block_size : num_vecs-block;
206  block_col.resize(bs);
207  for (size_t i=0; i<bs; ++i)
208  block_col[i] = col[block+i];
209  Kokkos::parallel_for( n , Multiply(A,x,y,block_col) );
210  }
211  }
212 };
213 
214 #define USE_NEW 1
215 #if USE_NEW
216 // Generic matrix multi-vector multiply kernel for CrsMatrix
217 // Experimenting with blocking of column and row loops to improve cache
218 // performance. Seems to help signficantly on SandyBridge, little difference
219 // on MIC (although not extensive investigation of block sizes).
220 template <typename MatrixValue,
221  typename Layout,
222  typename Device,
223  typename InputMultiVectorType,
224  typename OutputMultiVectorType >
225 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
226  InputMultiVectorType,
227  OutputMultiVectorType,
228  void,
229  IntegralRank<2> >
230 {
231 public:
233  typedef InputMultiVectorType input_multi_vector_type;
234  typedef OutputMultiVectorType output_multi_vector_type;
235 
236  typedef Device execution_space;
237  typedef typename execution_space::size_type size_type;
239 
245 
246  static const size_type m_block_row_size = 32;
247  static const size_type m_block_col_size = 20;
248 
250  const input_multi_vector_type& x,
252  : m_A( A )
253  , m_x( x )
254  , m_y( y )
255  , m_num_row( A.graph.row_map.extent(0)-1 )
256  , m_num_col( m_y.extent(1) )
257  {
258  }
259 
260  //--------------------------------------------------------------------------
261 
262  KOKKOS_INLINE_FUNCTION
263  void operator()( const size_type iBlockRow ) const
264  {
265  // Number of rows in this block
266  const size_type num_row =
267  iBlockRow+m_block_row_size <= m_num_row ?
268  m_block_row_size : m_num_row-iBlockRow;
269 
270  // Loop over block columns of x
271  for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
272  // Number of columns in this block
273  const size_type num_col =
274  iBlockCol+m_block_col_size <= m_num_col ?
275  m_block_col_size : m_num_col-iBlockCol;
276 
277  // Loop over rows in this block of A
278  const size_type iRowEnd = iBlockRow + num_row;
279  for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
280 
281  // Range of column entries for this row
282  const size_type iEntryBegin = m_A.graph.row_map[iRow];
283  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
284 
285  // Loop over columns in this block of x
286  const size_type iColEnd = iBlockCol + num_col;
287  for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
288 
289  // Loop columns of A for this row
290  scalar_type sum = 0.0;
291  for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
292  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
293  }
294  m_y( iRow, iCol ) = sum;
295 
296  }
297 
298  }
299 
300  }
301 
302  }
303 
304  static void apply( const matrix_type & A,
305  const input_multi_vector_type& x,
307  {
308  // Parallelize over row blocks of size m_block_row_size
309  const size_type num_row = A.graph.row_map.extent(0) - 1;
310  const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
311  Kokkos::parallel_for( n , Multiply(A,x,y) );
312  }
313 };
314 #else
315 // Generic matrix multi-vector multiply kernel for CrsMatrix
316 template <typename MatrixValue,
317  typename Layout,
318  typename Device,
319  typename InputMultiVectorType,
320  typename OutputMultiVectorType >
321 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
322  InputMultiVectorType,
323  OutputMultiVectorType,
324  void,
325  IntegralRank<2> >
326 {
327 public:
328  typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
329  typedef InputMultiVectorType input_multi_vector_type;
330  typedef OutputMultiVectorType output_multi_vector_type;
331 
332  typedef Device execution_space;
333  typedef typename execution_space::size_type size_type;
335 
336  const matrix_type m_A;
337  const input_multi_vector_type m_x;
338  output_multi_vector_type m_y;
339  const size_type m_num_vecs;
340 
341  Multiply( const matrix_type& A,
342  const input_multi_vector_type& x,
343  output_multi_vector_type& y)
344  : m_A( A )
345  , m_x( x )
346  , m_y( y )
347  , m_num_vecs( m_y.extent(1) )
348  {}
349 
350  //--------------------------------------------------------------------------
351 
352  KOKKOS_INLINE_FUNCTION
353  void operator()( const size_type iRow ) const
354  {
355  const size_type iEntryBegin = m_A.graph.row_map[iRow];
356  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
357 
358  for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
359 
360  scalar_type sum = 0.0;
361 
362  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
363  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
364  }
365 
366  m_y( iRow, iCol ) = sum;
367 
368  }
369 
370  }
371 
372  static void apply( const matrix_type& A,
373  const input_multi_vector_type& x,
374  output_multi_vector_type& y )
375  {
376  const size_t n = A.graph.row_map.extent(0) - 1 ;
377  Kokkos::parallel_for( n , Multiply(A,x,y) );
378 
379  // const size_t block_size = 20;
380  // const size_t num_vecs = col.size();
381  // std::vector<OrdinalType> block_col;
382  // block_col.reserve(block_size);
383  // for (size_t block=0; block<num_vecs; block+=block_size) {
384  // const size_t bs =
385  // block+block_size <= num_vecs ? block_size : num_vecs-block;
386  // block_col.resize(bs);
387  // for (size_t i=0; i<bs; ++i)
388  // block_col[i] = col[block+i];
389  // Kokkos::parallel_for( n , Multiply(A,x,y,block_col) );
390  // }
391  }
392 };
393 #endif
394 
395 #if USE_NEW
396 // Generic matrix multi-vector multiply kernel for CrsMatrix
397 // Experimenting with blocking of column and row loops to improve cache
398 // performance. Seems to help signficantly on SandyBridge, little difference
399 // on MIC (although not extensive investigation of block sizes).
400 template <typename MatrixValue,
401  typename Layout,
402  typename Device,
403  typename InputViewType,
404  typename OutputViewType>
405 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
406  std::vector<InputViewType>,
407  std::vector<OutputViewType>,
408  void,
409  IntegralRank<1> >
410 {
411 public:
413  typedef std::vector<InputViewType> input_multi_vector_type;
414  typedef std::vector<OutputViewType> output_multi_vector_type;
415 
416  typedef Device execution_space;
417  typedef typename execution_space::size_type size_type;
419 
425 
426  static const size_type m_block_row_size = 32;
427  static const size_type m_block_col_size = 20;
428 
430  const input_multi_vector_type& x,
432  : m_A( A )
433  , m_x( x )
434  , m_y( y )
435  , m_num_row( A.graph.row_map.extent(0)-1 )
436  , m_num_col( x.size() )
437  {
438  }
439 
440  //--------------------------------------------------------------------------
441 
442  KOKKOS_INLINE_FUNCTION
443  void operator()( const size_type iBlockRow ) const
444  {
445  // Number of rows in this block
446  const size_type num_row =
447  iBlockRow+m_block_row_size <= m_num_row ?
448  m_block_row_size : m_num_row-iBlockRow;
449 
450  // Loop over block columns of x
451  for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
452  // Number of columns in this block
453  const size_type num_col =
454  iBlockCol+m_block_col_size <= m_num_col ?
455  m_block_col_size : m_num_col-iBlockCol;
456 
457  // Loop over rows in this block of A
458  const size_type iRowEnd = iBlockRow + num_row;
459  for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
460 
461  // Range of column entries for this row
462  const size_type iEntryBegin = m_A.graph.row_map[iRow];
463  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
464 
465  // Loop over columns in this block of x
466  const size_type iColEnd = iBlockCol + num_col;
467  for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
468 
469  // Loop columns of A for this row
470  scalar_type sum = 0.0;
471  for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
472  sum += m_A.values(iEntry) * m_x[iCol](m_A.graph.entries(iEntry));
473  }
474  m_y[iCol](iRow) = sum;
475 
476  }
477 
478  }
479 
480  }
481 
482  }
483 
484  static void apply( const matrix_type & A,
485  const input_multi_vector_type& x,
487  {
488  // Parallelize over row blocks of size m_block_row_size
489  const size_type num_row = A.graph.row_map.extent(0) - 1;
490  const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
491  Kokkos::parallel_for( n , Multiply(A,x,y) );
492  }
493 };
494 #else
495 // Generic matrix multi-vector multiply kernel for CrsMatrix
496 template <typename MatrixValue,
497  typename Layout,
498  typename Device,
499  typename InputViewType,
500  typename OutputViewType>
501 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
502  std::vector<InputViewType>,
503  std::vector<OutputViewType>,
504  void,
505  IntegralRank<1> >
506 {
507 public:
508  typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
509  typedef std::vector<InputViewType> input_multi_vector_type;
510  typedef std::vector<OutputViewType> output_multi_vector_type;
511 
512  typedef Device execution_space;
513  typedef typename execution_space::size_type size_type;
514  typedef typename OutputViewType::value_type scalar_type;
515 
516  const matrix_type m_A;
517  const input_multi_vector_type m_x;
518  output_multi_vector_type m_y;
519  const size_type m_num_vecs;
520 
521  Multiply( const matrix_type& A,
522  const input_multi_vector_type& x,
523  output_multi_vector_type& y )
524  : m_A( A )
525  , m_x( x )
526  , m_y( y )
527  , m_num_vecs( x.size() )
528  {
529  }
530 
531  //--------------------------------------------------------------------------
532 
533  KOKKOS_INLINE_FUNCTION
534  void operator()( const size_type iRow ) const
535  {
536  const size_type iEntryBegin = m_A.graph.row_map[iRow];
537  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
538 
539  for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
540 
541  scalar_type sum = 0.0;
542 
543  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
544  sum += m_A.values(iEntry) * m_x[iCol]( m_A.graph.entries(iEntry) );
545  }
546 
547  m_y[iCol]( iRow) = sum;
548 
549  }
550 
551  }
552 
553  static void apply( const matrix_type & A,
554  const input_multi_vector_type& x,
555  output_multi_vector_type& y )
556  {
557  const size_t n = A.graph.row_map.extent(0) - 1 ;
558  Kokkos::parallel_for( n , Multiply(A,x,y) );
559 
560  // const size_t block_size = 20;
561  // const size_t num_vecs = x.size();
562  // input_multi_vector_type xx;
563  // output_multi_vector_type yy;
564  // xx.reserve(block_size);
565  // yy.reserve(block_size);
566  // for (size_t block=0; block<num_vecs; block+=block_size) {
567  // const size_t bs =
568  // block+block_size <= num_vecs ? block_size : num_vecs-block;
569  // xx.resize(bs);
570  // yy.resize(bs);
571  // for (size_t i=0; i<bs; ++i) {
572  // xx[i] = x[block+i];
573  // yy[i] = y[block+i];
574  // }
575  // Kokkos::parallel_for( n , Multiply(A,xx,yy) );
576  // }
577  }
578 };
579 #endif
580 
581 // Matrix multivector multiply specializations for one column at a time
583 template <typename MatrixValue,
584  typename Layout,
585  typename Device,
586  typename InputMultiVectorType,
587  typename OutputMultiVectorType,
588  typename OrdinalType>
590  const InputMultiVectorType& x,
591  OutputMultiVectorType& y,
592  const std::vector<OrdinalType>& col_indices,
594 {
595  typedef CrsMatrix<MatrixValue,Device,Layout> MatrixType;
596 
597  typedef Kokkos::View<typename InputMultiVectorType::value_type*, typename InputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> InputVectorType;
598  typedef Kokkos::View<typename OutputMultiVectorType::value_type*, typename OutputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> OutputVectorType;
600  for (size_t i=0; i<col_indices.size(); ++i) {
601  InputVectorType x_view =
602  Kokkos::subview( x , Kokkos::ALL() , col_indices[i] );
603  OutputVectorType y_view =
604  Kokkos::subview( y , Kokkos::ALL() , col_indices[i] );
605  multiply_type::apply( A , x_view , y_view );
606  }
607 }
608 
609 template <typename MatrixValue,
610  typename Layout,
611  typename Device,
612  typename InputVectorType,
613  typename OutputVectorType>
615  const std::vector<InputVectorType>& x,
616  std::vector<OutputVectorType>& y,
618 {
619  typedef CrsMatrix<MatrixValue,Device,Layout> MatrixType;
621  for (size_t i=0; i<x.size(); ++i) {
622  multiply_type::apply( A , x[i] , y[i] );
623  }
624 }
625 
626 } // namespace Stokhos
627 
628 //----------------------------------------------------------------------------
629 //----------------------------------------------------------------------------
630 
631 namespace Kokkos {
632 
633 template <typename ValueType, typename Layout, typename Device>
637  mirror_A.values = Kokkos::create_mirror(A.values);
638  mirror_A.graph = Kokkos::create_mirror(A.graph); // this deep copies
639  mirror_A.dev_config = A.dev_config;
640  return mirror_A;
641 }
642 
643 template <typename ValueType, typename Layout, typename Device>
648  mirror_A.graph = Kokkos::create_mirror(A.graph); // this deep copies
649  mirror_A.dev_config = A.dev_config;
650  return mirror_A;
651 }
652 
653 template <typename ValueType, typename Layout, typename DstDevice,
654  typename SrcDevice>
655 void
658  Kokkos::deep_copy(dst.values, src.values);
659 }
660 
661 } // namespace Kokkos
662 
663 //----------------------------------------------------------------------------
664 //----------------------------------------------------------------------------
665 
666 namespace Stokhos {
667 
668 // MatrixMarket writer for CrsMatrix
669 template < typename MatrixValue, typename Layout, typename Device >
670 class MatrixMarketWriter< CrsMatrix<MatrixValue,Device,Layout> >
671 {
672 public:
674  typedef Device execution_space ;
675  typedef typename execution_space::size_type size_type ;
676 
677  static void write(const matrix_type& A, const std::string& filename) {
678  std::ofstream file(filename.c_str());
679  file.precision(16);
680  file.setf(std::ios::scientific);
681 
683  Kokkos::deep_copy(hA, A);
684 
685  const size_type nRow = hA.graph.row_map.extent(0) - 1 ;
686 
687  // Write banner
688  file << "%%MatrixMarket matrix coordinate real general" << std::endl;
689  file << nRow << " " << nRow << " " << hA.values.extent(0) << std::endl;
690 
691  for (size_type row=0; row<nRow; ++row) {
692  size_type entryBegin = hA.graph.row_map(row);
693  size_type entryEnd = hA.graph.row_map(row+1);
694  for (size_type entry=entryBegin; entry<entryEnd; ++entry) {
695  file << row+1 << " " << hA.graph.entries(entry)+1 << " "
696  << std::setw(22) << hA.values(entry) << std::endl;
697  }
698  }
699 
700  file.close();
701  }
702 };
703 
704 } // namespace Stokhos
705 
706 #endif /* #ifndef STOKHOS_CRSMATRIX_HPP */
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
Kokkos::DefaultExecutionSpace execution_space
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y, const column_indices_type &col)
CrsMatrix(Stokhos::DeviceConfig dev_config_)
Multiply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y, const column_indices_type &col_indices)
CrsMatrix< ValueType, typename values_type::host_mirror_space, Layout > HostMirror
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
static void write(const matrix_type &A, const std::string &filename)
DeviceConfig(const size_t num_blocks_, const size_t threads_per_block_x_, const size_t threads_per_block_y_=1, const size_t threads_per_block_z_=1)
Kokkos::View< value_type[], Layout, execution_space > values_type
static void apply(const matrix_type &A, const input_vector_type &x, output_vector_type &y)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Kokkos::StaticCrsGraph< int, Layout, execution_space, void, int > graph_type
Stokhos::DeviceConfig dev_config
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
int n
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)