Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_CrsMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_CRSMATRIX_HPP
43 #define STOKHOS_CRSMATRIX_HPP
44 
45 #include <fstream>
46 #include <iomanip>
47 
48 #include "Kokkos_Core.hpp"
49 #include "Kokkos_StaticCrsGraph.hpp"
50 
51 #include "Stokhos_Multiply.hpp"
52 #include "Stokhos_MatrixMarket.hpp"
53 
54 namespace Stokhos {
55 
56 struct DeviceConfig {
57  struct Dim3 {
58  size_t x, y, z;
59  Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) :
60  x(x_), y(y_), z(z_) {}
61  };
62 
64  size_t num_blocks;
66 
67  DeviceConfig(const size_t num_blocks_,
68  const size_t threads_per_block_x_,
69  const size_t threads_per_block_y_ = 1,
70  const size_t threads_per_block_z_ = 1) :
71  block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
72  num_blocks(num_blocks_),
74  {}
75 };
76 
78 template <typename ValueType, typename Device,
79  typename Layout = Kokkos::LayoutRight>
80 class CrsMatrix {
81 public:
82  typedef Device execution_space;
83  typedef ValueType value_type;
84  typedef Kokkos::View< value_type[], Layout, execution_space > values_type;
85 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
86  typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , int > graph_type;
87 #else
88  typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , void, int > graph_type;
89 #endif
90 
92 
96 
97  CrsMatrix() : dev_config(0, 0) {}
98  CrsMatrix(Stokhos::DeviceConfig dev_config_) : dev_config(dev_config_) {}
99 };
100 
101 // Generic matrix vector multiply kernel for CrsMatrix
102 template <typename MatrixValue,
103  typename Layout,
104  typename Device,
105  typename InputVectorType,
106  typename OutputVectorType>
107 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
108  InputVectorType,
109  OutputVectorType,
110  void,
111  IntegralRank<1> >
112 {
113 public:
115  typedef InputVectorType input_vector_type;
116  typedef OutputVectorType output_vector_type;
117 
118  typedef Device execution_space;
119  typedef typename execution_space::size_type size_type;
121 
125 
127  const input_vector_type& x,
128  output_vector_type& y )
129  : m_A( A )
130  , m_x( x )
131  , m_y( y )
132  {}
133 
134  //--------------------------------------------------------------------------
135 
136  KOKKOS_INLINE_FUNCTION
137  void operator()( const size_type iRow ) const
138  {
139  const size_type iEntryBegin = m_A.graph.row_map[iRow];
140  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
141 
142  scalar_type sum = 0;
143 
144  for ( size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry ) {
145  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry) );
146  }
147 
148  m_y(iRow) = sum;
149  }
150 
151  static void apply( const matrix_type & A,
152  const input_vector_type & x,
153  output_vector_type & y )
154  {
155  const size_t row_count = A.graph.row_map.extent(0) - 1;
156  Kokkos::parallel_for( row_count, Multiply(A,x,y) );
157  }
158 };
159 
160 // Generic matrix multi-vector multiply kernel for CrsMatrix
161 template <typename MatrixValue,
162  typename Layout,
163  typename Device,
164  typename InputMultiVectorType,
165  typename OutputMultiVectorType,
166  typename OrdinalType >
167 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
168  InputMultiVectorType,
169  OutputMultiVectorType,
170  std::vector<OrdinalType>,
171  IntegralRank<2> >
172 {
173 public:
175  typedef InputMultiVectorType input_multi_vector_type;
176  typedef OutputMultiVectorType output_multi_vector_type;
177  typedef std::vector<OrdinalType> column_indices_type;
178 
179  typedef Device execution_space;
180  typedef typename execution_space::size_type size_type;
182 
188 
190  const input_multi_vector_type& x,
192  const column_indices_type& col_indices )
193  : m_A( A )
194  , m_x( x )
195  , m_y( y )
196  , m_col_indices( col_indices )
197  , m_num_vecs( col_indices.size() )
198  {}
199 
200  //--------------------------------------------------------------------------
201 
202  KOKKOS_INLINE_FUNCTION
203  void operator()( const size_type iRow ) const
204  {
205  const size_type iEntryBegin = m_A.graph.row_map[iRow];
206  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
207 
208  for (size_type j=0; j<m_num_vecs; j++) {
209  size_type iCol = m_col_indices[j];
210 
211  scalar_type sum = 0.0;
212 
213  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
214  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
215  }
216 
217  m_y( iRow, iCol ) = sum;
218 
219  }
220 
221  }
222 
223  static void apply( const matrix_type& A,
224  const input_multi_vector_type& x,
226  const column_indices_type& col )
227  {
228  const size_t n = A.graph.row_map.extent(0) - 1 ;
229  //Kokkos::parallel_for( n , Multiply(A,x,y,col) );
230 
231  const size_t block_size = 20;
232  const size_t num_vecs = col.size();
233  std::vector<OrdinalType> block_col;
234  block_col.reserve(block_size);
235  for (size_t block=0; block<num_vecs; block+=block_size) {
236  const size_t bs =
237  block+block_size <= num_vecs ? block_size : num_vecs-block;
238  block_col.resize(bs);
239  for (size_t i=0; i<bs; ++i)
240  block_col[i] = col[block+i];
241  Kokkos::parallel_for( n , Multiply(A,x,y,block_col) );
242  }
243  }
244 };
245 
246 #define USE_NEW 1
247 #if USE_NEW
248 // Generic matrix multi-vector multiply kernel for CrsMatrix
249 // Experimenting with blocking of column and row loops to improve cache
250 // performance. Seems to help signficantly on SandyBridge, little difference
251 // on MIC (although not extensive investigation of block sizes).
252 template <typename MatrixValue,
253  typename Layout,
254  typename Device,
255  typename InputMultiVectorType,
256  typename OutputMultiVectorType >
257 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
258  InputMultiVectorType,
259  OutputMultiVectorType,
260  void,
261  IntegralRank<2> >
262 {
263 public:
265  typedef InputMultiVectorType input_multi_vector_type;
266  typedef OutputMultiVectorType output_multi_vector_type;
267 
268  typedef Device execution_space;
269  typedef typename execution_space::size_type size_type;
271 
277 
278  static const size_type m_block_row_size = 32;
279  static const size_type m_block_col_size = 20;
280 
282  const input_multi_vector_type& x,
284  : m_A( A )
285  , m_x( x )
286  , m_y( y )
287  , m_num_row( A.graph.row_map.extent(0)-1 )
288  , m_num_col( m_y.extent(1) )
289  {
290  }
291 
292  //--------------------------------------------------------------------------
293 
294  KOKKOS_INLINE_FUNCTION
295  void operator()( const size_type iBlockRow ) const
296  {
297  // Number of rows in this block
298  const size_type num_row =
299  iBlockRow+m_block_row_size <= m_num_row ?
300  m_block_row_size : m_num_row-iBlockRow;
301 
302  // Loop over block columns of x
303  for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
304  // Number of columns in this block
305  const size_type num_col =
306  iBlockCol+m_block_col_size <= m_num_col ?
307  m_block_col_size : m_num_col-iBlockCol;
308 
309  // Loop over rows in this block of A
310  const size_type iRowEnd = iBlockRow + num_row;
311  for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
312 
313  // Range of column entries for this row
314  const size_type iEntryBegin = m_A.graph.row_map[iRow];
315  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
316 
317  // Loop over columns in this block of x
318  const size_type iColEnd = iBlockCol + num_col;
319  for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
320 
321  // Loop columns of A for this row
322  scalar_type sum = 0.0;
323  for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
324  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
325  }
326  m_y( iRow, iCol ) = sum;
327 
328  }
329 
330  }
331 
332  }
333 
334  }
335 
336  static void apply( const matrix_type & A,
337  const input_multi_vector_type& x,
339  {
340  // Parallelize over row blocks of size m_block_row_size
341  const size_type num_row = A.graph.row_map.extent(0) - 1;
342  const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
343  Kokkos::parallel_for( n , Multiply(A,x,y) );
344  }
345 };
346 #else
347 // Generic matrix multi-vector multiply kernel for CrsMatrix
348 template <typename MatrixValue,
349  typename Layout,
350  typename Device,
351  typename InputMultiVectorType,
352  typename OutputMultiVectorType >
353 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
354  InputMultiVectorType,
355  OutputMultiVectorType,
356  void,
357  IntegralRank<2> >
358 {
359 public:
360  typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
361  typedef InputMultiVectorType input_multi_vector_type;
362  typedef OutputMultiVectorType output_multi_vector_type;
363 
364  typedef Device execution_space;
365  typedef typename execution_space::size_type size_type;
367 
368  const matrix_type m_A;
369  const input_multi_vector_type m_x;
370  output_multi_vector_type m_y;
371  const size_type m_num_vecs;
372 
373  Multiply( const matrix_type& A,
374  const input_multi_vector_type& x,
375  output_multi_vector_type& y)
376  : m_A( A )
377  , m_x( x )
378  , m_y( y )
379  , m_num_vecs( m_y.extent(1) )
380  {}
381 
382  //--------------------------------------------------------------------------
383 
384  KOKKOS_INLINE_FUNCTION
385  void operator()( const size_type iRow ) const
386  {
387  const size_type iEntryBegin = m_A.graph.row_map[iRow];
388  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
389 
390  for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
391 
392  scalar_type sum = 0.0;
393 
394  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
395  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
396  }
397 
398  m_y( iRow, iCol ) = sum;
399 
400  }
401 
402  }
403 
404  static void apply( const matrix_type& A,
405  const input_multi_vector_type& x,
406  output_multi_vector_type& y )
407  {
408  const size_t n = A.graph.row_map.extent(0) - 1 ;
409  Kokkos::parallel_for( n , Multiply(A,x,y) );
410 
411  // const size_t block_size = 20;
412  // const size_t num_vecs = col.size();
413  // std::vector<OrdinalType> block_col;
414  // block_col.reserve(block_size);
415  // for (size_t block=0; block<num_vecs; block+=block_size) {
416  // const size_t bs =
417  // block+block_size <= num_vecs ? block_size : num_vecs-block;
418  // block_col.resize(bs);
419  // for (size_t i=0; i<bs; ++i)
420  // block_col[i] = col[block+i];
421  // Kokkos::parallel_for( n , Multiply(A,x,y,block_col) );
422  // }
423  }
424 };
425 #endif
426 
427 #if USE_NEW
428 // Generic matrix multi-vector multiply kernel for CrsMatrix
429 // Experimenting with blocking of column and row loops to improve cache
430 // performance. Seems to help signficantly on SandyBridge, little difference
431 // on MIC (although not extensive investigation of block sizes).
432 template <typename MatrixValue,
433  typename Layout,
434  typename Device,
435  typename InputViewType,
436  typename OutputViewType>
437 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
438  std::vector<InputViewType>,
439  std::vector<OutputViewType>,
440  void,
441  IntegralRank<1> >
442 {
443 public:
445  typedef std::vector<InputViewType> input_multi_vector_type;
446  typedef std::vector<OutputViewType> output_multi_vector_type;
447 
448  typedef Device execution_space;
449  typedef typename execution_space::size_type size_type;
451 
457 
458  static const size_type m_block_row_size = 32;
459  static const size_type m_block_col_size = 20;
460 
462  const input_multi_vector_type& x,
464  : m_A( A )
465  , m_x( x )
466  , m_y( y )
467  , m_num_row( A.graph.row_map.extent(0)-1 )
468  , m_num_col( x.size() )
469  {
470  }
471 
472  //--------------------------------------------------------------------------
473 
474  KOKKOS_INLINE_FUNCTION
475  void operator()( const size_type iBlockRow ) const
476  {
477  // Number of rows in this block
478  const size_type num_row =
479  iBlockRow+m_block_row_size <= m_num_row ?
480  m_block_row_size : m_num_row-iBlockRow;
481 
482  // Loop over block columns of x
483  for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
484  // Number of columns in this block
485  const size_type num_col =
486  iBlockCol+m_block_col_size <= m_num_col ?
487  m_block_col_size : m_num_col-iBlockCol;
488 
489  // Loop over rows in this block of A
490  const size_type iRowEnd = iBlockRow + num_row;
491  for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
492 
493  // Range of column entries for this row
494  const size_type iEntryBegin = m_A.graph.row_map[iRow];
495  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
496 
497  // Loop over columns in this block of x
498  const size_type iColEnd = iBlockCol + num_col;
499  for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
500 
501  // Loop columns of A for this row
502  scalar_type sum = 0.0;
503  for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
504  sum += m_A.values(iEntry) * m_x[iCol](m_A.graph.entries(iEntry));
505  }
506  m_y[iCol](iRow) = sum;
507 
508  }
509 
510  }
511 
512  }
513 
514  }
515 
516  static void apply( const matrix_type & A,
517  const input_multi_vector_type& x,
519  {
520  // Parallelize over row blocks of size m_block_row_size
521  const size_type num_row = A.graph.row_map.extent(0) - 1;
522  const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
523  Kokkos::parallel_for( n , Multiply(A,x,y) );
524  }
525 };
526 #else
527 // Generic matrix multi-vector multiply kernel for CrsMatrix
528 template <typename MatrixValue,
529  typename Layout,
530  typename Device,
531  typename InputViewType,
532  typename OutputViewType>
533 class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
534  std::vector<InputViewType>,
535  std::vector<OutputViewType>,
536  void,
537  IntegralRank<1> >
538 {
539 public:
540  typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
541  typedef std::vector<InputViewType> input_multi_vector_type;
542  typedef std::vector<OutputViewType> output_multi_vector_type;
543 
544  typedef Device execution_space;
545  typedef typename execution_space::size_type size_type;
546  typedef typename OutputViewType::value_type scalar_type;
547 
548  const matrix_type m_A;
549  const input_multi_vector_type m_x;
550  output_multi_vector_type m_y;
551  const size_type m_num_vecs;
552 
553  Multiply( const matrix_type& A,
554  const input_multi_vector_type& x,
555  output_multi_vector_type& y )
556  : m_A( A )
557  , m_x( x )
558  , m_y( y )
559  , m_num_vecs( x.size() )
560  {
561  }
562 
563  //--------------------------------------------------------------------------
564 
565  KOKKOS_INLINE_FUNCTION
566  void operator()( const size_type iRow ) const
567  {
568  const size_type iEntryBegin = m_A.graph.row_map[iRow];
569  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
570 
571  for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
572 
573  scalar_type sum = 0.0;
574 
575  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
576  sum += m_A.values(iEntry) * m_x[iCol]( m_A.graph.entries(iEntry) );
577  }
578 
579  m_y[iCol]( iRow) = sum;
580 
581  }
582 
583  }
584 
585  static void apply( const matrix_type & A,
586  const input_multi_vector_type& x,
587  output_multi_vector_type& y )
588  {
589  const size_t n = A.graph.row_map.extent(0) - 1 ;
590  Kokkos::parallel_for( n , Multiply(A,x,y) );
591 
592  // const size_t block_size = 20;
593  // const size_t num_vecs = x.size();
594  // input_multi_vector_type xx;
595  // output_multi_vector_type yy;
596  // xx.reserve(block_size);
597  // yy.reserve(block_size);
598  // for (size_t block=0; block<num_vecs; block+=block_size) {
599  // const size_t bs =
600  // block+block_size <= num_vecs ? block_size : num_vecs-block;
601  // xx.resize(bs);
602  // yy.resize(bs);
603  // for (size_t i=0; i<bs; ++i) {
604  // xx[i] = x[block+i];
605  // yy[i] = y[block+i];
606  // }
607  // Kokkos::parallel_for( n , Multiply(A,xx,yy) );
608  // }
609  }
610 };
611 #endif
612 
613 // Matrix multivector multiply specializations for one column at a time
615 template <typename MatrixValue,
616  typename Layout,
617  typename Device,
618  typename InputMultiVectorType,
619  typename OutputMultiVectorType,
620  typename OrdinalType>
622  const InputMultiVectorType& x,
623  OutputMultiVectorType& y,
624  const std::vector<OrdinalType>& col_indices,
626 {
627  typedef CrsMatrix<MatrixValue,Device,Layout> MatrixType;
628 
629  typedef Kokkos::View<typename InputMultiVectorType::value_type*, typename InputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> InputVectorType;
630  typedef Kokkos::View<typename OutputMultiVectorType::value_type*, typename OutputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> OutputVectorType;
632  for (size_t i=0; i<col_indices.size(); ++i) {
633  InputVectorType x_view =
634  Kokkos::subview( x , Kokkos::ALL() , col_indices[i] );
635  OutputVectorType y_view =
636  Kokkos::subview( y , Kokkos::ALL() , col_indices[i] );
637  multiply_type::apply( A , x_view , y_view );
638  }
639 }
640 
641 template <typename MatrixValue,
642  typename Layout,
643  typename Device,
644  typename InputVectorType,
645  typename OutputVectorType>
647  const std::vector<InputVectorType>& x,
648  std::vector<OutputVectorType>& y,
650 {
651  typedef CrsMatrix<MatrixValue,Device,Layout> MatrixType;
653  for (size_t i=0; i<x.size(); ++i) {
654  multiply_type::apply( A , x[i] , y[i] );
655  }
656 }
657 
658 } // namespace Stokhos
659 
660 //----------------------------------------------------------------------------
661 //----------------------------------------------------------------------------
662 
663 namespace Kokkos {
664 
665 template <typename ValueType, typename Layout, typename Device>
669  mirror_A.values = Kokkos::create_mirror(A.values);
670  mirror_A.graph = Kokkos::create_mirror(A.graph); // this deep copies
671  mirror_A.dev_config = A.dev_config;
672  return mirror_A;
673 }
674 
675 template <typename ValueType, typename Layout, typename Device>
680  mirror_A.graph = Kokkos::create_mirror(A.graph); // this deep copies
681  mirror_A.dev_config = A.dev_config;
682  return mirror_A;
683 }
684 
685 template <typename ValueType, typename Layout, typename DstDevice,
686  typename SrcDevice>
687 void
690  Kokkos::deep_copy(dst.values, src.values);
691 }
692 
693 } // namespace Kokkos
694 
695 //----------------------------------------------------------------------------
696 //----------------------------------------------------------------------------
697 
698 namespace Stokhos {
699 
700 // MatrixMarket writer for CrsMatrix
701 template < typename MatrixValue, typename Layout, typename Device >
702 class MatrixMarketWriter< CrsMatrix<MatrixValue,Device,Layout> >
703 {
704 public:
706  typedef Device execution_space ;
707  typedef typename execution_space::size_type size_type ;
708 
709  static void write(const matrix_type& A, const std::string& filename) {
710  std::ofstream file(filename.c_str());
711  file.precision(16);
712  file.setf(std::ios::scientific);
713 
715  Kokkos::deep_copy(hA, A);
716 
717  const size_type nRow = hA.graph.row_map.extent(0) - 1 ;
718 
719  // Write banner
720  file << "%%MatrixMarket matrix coordinate real general" << std::endl;
721  file << nRow << " " << nRow << " " << hA.values.extent(0) << std::endl;
722 
723  for (size_type row=0; row<nRow; ++row) {
724  size_type entryBegin = hA.graph.row_map(row);
725  size_type entryEnd = hA.graph.row_map(row+1);
726  for (size_type entry=entryBegin; entry<entryEnd; ++entry) {
727  file << row+1 << " " << hA.graph.entries(entry)+1 << " "
728  << std::setw(22) << hA.values(entry) << std::endl;
729  }
730  }
731 
732  file.close();
733  }
734 };
735 
736 } // namespace Stokhos
737 
738 #endif /* #ifndef STOKHOS_CRSMATRIX_HPP */
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
Kokkos::DefaultExecutionSpace execution_space
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y, const column_indices_type &col)
CrsMatrix(Stokhos::DeviceConfig dev_config_)
Multiply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y, const column_indices_type &col_indices)
CrsMatrix< ValueType, typename values_type::host_mirror_space, Layout > HostMirror
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
static void write(const matrix_type &A, const std::string &filename)
DeviceConfig(const size_t num_blocks_, const size_t threads_per_block_x_, const size_t threads_per_block_y_=1, const size_t threads_per_block_z_=1)
Kokkos::View< value_type[], Layout, execution_space > values_type
static void apply(const matrix_type &A, const input_vector_type &x, output_vector_type &y)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Kokkos::StaticCrsGraph< int, Layout, execution_space, void, int > graph_type
Stokhos::DeviceConfig dev_config
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
int n
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value >::type sum(const Kokkos::View< RD, RP...> &r, const Kokkos::View< XD, XP...> &x)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)