Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_Tpetra_Utilities.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef STOKHOS_TPETRA_UTILITIES_HPP
11 #define STOKHOS_TPETRA_UTILITIES_HPP
12 
15 #include "Tpetra_CrsMatrix.hpp"
16 
17 namespace Stokhos {
18 
20 
23  template <class ViewType>
25  public:
26  typedef ViewType MeanViewType;
28  typedef typename ViewType::size_type size_type;
29 
30  GetMeanValsFunc(const ViewType& vals) {
31  mean_vals = ViewType("mean-values", vals.extent(0));
33  }
34 
35  MeanViewType getMeanValues() const { return mean_vals; }
36 
37  private:
39  };
40 
42 
44  template <class Storage, class ... P>
45  class GetMeanValsFunc< Kokkos::View< Sacado::UQ::PCE<Storage>*,
46  P... > > {
47  public:
49  typedef Kokkos::View< Scalar*, P... > ViewType;
52  typedef typename ViewType::size_type size_type;
53 
54  GetMeanValsFunc(const ViewType& vals_) : vals(vals_) {
55  const size_type nnz = vals.extent(0);
56  typename Scalar::cijk_type mean_cijk =
57  Stokhos::create_mean_based_product_tensor<execution_space, typename Storage::ordinal_type, typename Storage::value_type>();
58  mean_vals = Kokkos::make_view<ViewType>("mean-values", mean_cijk, nnz, 1);
59  Kokkos::parallel_for( nnz, *this );
60  }
61 
62  KOKKOS_INLINE_FUNCTION
63  void operator() (const size_type i) const {
64  mean_vals(i) = vals(i).fastAccessCoeff(0);
65  }
66 
67  MeanViewType getMeanValues() const { return mean_vals; }
68 
69  private:
72  };
73 
75 
77  template <class Storage, class ... P>
78  class GetMeanValsFunc< Kokkos::View< Sacado::MP::Vector<Storage>*,
79  P... > > {
80  public:
82  typedef Kokkos::View< Scalar*, P... > ViewType;
85  typedef typename ViewType::size_type size_type;
86 
87  GetMeanValsFunc(const ViewType& vals_) :
88  vals(vals_), vec_size(Kokkos::dimension_scalar(vals))
89  {
90  const size_type nnz = vals.extent(0);
91  mean_vals = ViewType("mean-values", nnz, 1);
92  Kokkos::parallel_for( nnz, *this );
93  }
94 
95  KOKKOS_INLINE_FUNCTION
96  void operator() (const size_type i) const
97  {
98  typename Scalar::value_type s = 0.0;
99  for (size_type j=0; j<vec_size; ++j)
100  s += vals(i).fastAccessCoeff(j);
101  mean_vals(i) = s;
102  }
103 
105 
106  private:
110  };
111 
113 
116  template <class ViewType>
118  public:
119  typedef ViewType MeanViewType;
121  typedef typename ViewType::size_type size_type;
122 
123  GetScalarMeanValsFunc(const ViewType& vals) {
124  mean_vals = ViewType("mean-values", vals.extent(0));
125  Kokkos::deep_copy( mean_vals, vals );
126  }
127 
129 
130  private:
132  };
133 
135 
137  template <class Storage, class ... P>
138  class GetScalarMeanValsFunc< Kokkos::View< Sacado::UQ::PCE<Storage>*,
139  P... > > {
140  public:
142  typedef typename Scalar::value_type MeanScalar;
143  typedef Kokkos::View< Scalar*, P... > ViewType;
144  typedef Kokkos::View< MeanScalar*, P... > MeanViewType;
146  typedef typename ViewType::size_type size_type;
147 
148  GetScalarMeanValsFunc(const ViewType& vals_) : vals(vals_) {
149  const size_type nnz = vals.extent(0);
150  mean_vals = MeanViewType("mean-values", nnz);
151  Kokkos::parallel_for( nnz, *this );
152  }
153 
154  KOKKOS_INLINE_FUNCTION
155  void operator() (const size_type i) const {
156  mean_vals(i) = vals(i).fastAccessCoeff(0);
157  }
158 
160 
161  private:
164  };
165 
167 
169  template <class Storage, class ... P>
170  class GetScalarMeanValsFunc< Kokkos::View< Sacado::MP::Vector<Storage>*,
171  P... > > {
172  public:
174  typedef typename Scalar::value_type MeanScalar;
175  typedef Kokkos::View< Scalar*, P... > ViewType;
176  typedef Kokkos::View< MeanScalar*, P... > MeanViewType;
178  typedef typename ViewType::size_type size_type;
179 
181  vals(vals_), vec_size(Kokkos::dimension_scalar(vals))
182  {
183  const size_type nnz = vals.extent(0);
184  mean_vals = ViewType("mean-values", nnz);
185  Kokkos::parallel_for( nnz, *this );
186  }
187 
188  KOKKOS_INLINE_FUNCTION
189  void operator() (const size_type i) const
190  {
191  typename Scalar::value_type s = 0.0;
192  for (size_type j=0; j<vec_size; ++j)
193  s += vals(i).fastAccessCoeff(j);
194  mean_vals(i) = s;
195  }
196 
198 
199  private:
203  };
204 
205  template <typename Scalar, typename LO, typename GO, typename N>
207  build_mean_matrix(const Tpetra::CrsMatrix<Scalar,LO,GO,N>& A)
208  {
209  using Teuchos::RCP;
210  using Teuchos::rcp;
211  typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
212  typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
213  typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
214 
215  KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
216  KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
217  const size_t ncols = kokkos_matrix.numCols();
219  typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
220  MeanFunc meanfunc(matrix_values);
221  KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
222 
223  // From here on we are assuming that
224  // KokkosMeanMatrixValuesType == KokkosMatrixValuestype
225 
226  RCP < MatrixType > mean_matrix =
227  rcp( new MatrixType(A.getCrsGraph(), mean_matrix_values) );
228  mean_matrix->fillComplete();
229  return mean_matrix;
230  }
231 
232  template <typename Scalar, typename LO, typename GO, typename N>
234  build_mean_scalar_matrix(const Tpetra::CrsMatrix<Scalar,LO,GO,N>& A)
235  {
236  using Teuchos::RCP;
237  using Teuchos::rcp;
238  typedef typename Scalar::value_type BaseScalar;
239  typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
240  typedef Tpetra::CrsMatrix<BaseScalar,LO,GO,N> ScalarMatrixType;
241  typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
242  typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
243 
244  KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
245  KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
247  typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
248  MeanFunc meanfunc(matrix_values);
249  KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
250 
251  // From here on we are assuming that
252  // KokkosMeanMatrixValuesType == ScalarKokkosMatrixValuesType
253 
254  RCP < ScalarMatrixType > mean_matrix =
255  rcp( new ScalarMatrixType(A.getCrsGraph(), mean_matrix_values) );
256  mean_matrix->fillComplete();
257  return mean_matrix;
258  }
259 
260  namespace Impl {
261 
262  // Functor for copying a PCE view to a scalar view
263  // (Assumes view is rank 2, LayoutLeft)
264  template <typename ExecSpace>
265  struct CopyPCE2Scalar {
266  typedef ExecSpace exec_space;
267  template <typename DstView, typename SrcView>
268  CopyPCE2Scalar(const DstView& dst, const SrcView& src) {
269  impl(dst,src);
270  }
271 
272  template <typename DstView, typename SrcView>
273  void impl(const DstView& dst, const SrcView& src) {
274  typedef typename SrcView::non_const_value_type Scalar;
275  const size_t m = src.extent(0);
276  const size_t n = src.extent(1);
277  const size_t p = Kokkos::dimension_scalar(src);
278  Kokkos::RangePolicy<exec_space> policy(0,m);
279  Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const size_t i)
280  {
281  for (size_t j=0; j<n; ++j) {
282  const Scalar& s = src(i,j);
283  for (size_t k=0; k<p; ++k)
284  dst(i,j*p+k) = s.fastAccessCoeff(k);
285  }
286  });
287  }
288  };
289 
290  // Functor for copying a scalar view to a PCE view
291  // (Assumes view is rank 2, LayoutLeft)
292  template <typename ExecSpace>
293  struct CopyScalar2PCE {
294  typedef ExecSpace exec_space;
295 
296  template <typename DstView, typename SrcView>
297  CopyScalar2PCE(const DstView& dst, const SrcView& src) {
298  impl(dst,src);
299  }
300 
301  template <typename DstView, typename SrcView>
302  void impl(const DstView& dst, const SrcView& src) {
303  typedef typename DstView::non_const_value_type Scalar;
304  const size_t m = dst.extent(0);
305  const size_t n = dst.extent(1);
306  const size_t p = Kokkos::dimension_scalar(dst);
307 
308  Kokkos::RangePolicy<exec_space> policy(0,m);
309  Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const size_t i)
310  {
311  for (size_t j=0; j<n; ++j) {
312  Scalar& d = dst(i,j);
313  for (size_t k=0; k<p; ++k)
314  d.fastAccessCoeff(k) = src(i,j*p+k);
315  }
316  });
317  }
318  };
319 
320 #ifdef KOKKOS_ENABLE_CUDA
321  // Specialization for CopyPCE2Scalar specifically for Cuda that ensures
322  // coalesced reads and writes
323  template <>
324  struct CopyPCE2Scalar<Kokkos::Cuda> {
325  typedef Kokkos::Cuda exec_space;
326 
327  template <typename DstView, typename SrcView>
328  CopyPCE2Scalar(const DstView& dst, const SrcView& src) {
329  impl(dst,src);
330  }
331 
332  template <typename DstView, typename SrcView>
333  void impl(const DstView& dst, const SrcView& src) {
334  typedef typename DstView::non_const_value_type Scalar;
335  typedef Kokkos::TeamPolicy<exec_space> Policy;
336  typedef typename Policy::member_type Member;
337 
338  const size_t m = src.extent(0);
339  const size_t n = src.extent(1);
340  const size_t p = Kokkos::dimension_scalar(src);
341 
342  const size_t ChunkSize = 16;
343  const size_t M = (m+ChunkSize-1)/ChunkSize;
344 
345  Policy policy(M,ChunkSize,ChunkSize);
346  Kokkos::parallel_for( policy, [=] __device__(const Member& team)
347  {
348  __shared__ Scalar tmp[ChunkSize][ChunkSize];
349  const size_t i_block = blockIdx.x*ChunkSize;
350 
351  for (size_t j=0; j<n; ++j) {
352  for (size_t k_block=0; k_block<p; k_block+=ChunkSize) {
353 
354  // Make sure previous iteration has completed before overwriting tmp
355  __syncthreads();
356 
357  // Read ChunkSize x ChunkSize block (coalesced on k)
358  size_t i = i_block + threadIdx.y;
359  size_t k = k_block + threadIdx.x;
360  if (i < m && k < p)
361  tmp[threadIdx.y][threadIdx.x] = src(i,j).fastAccessCoeff(k);
362 
363  // Wait for all threads to finish
364  __syncthreads();
365 
366  // Write ChunkSize x ChunkSize block (coalesced on i for LayoutLeft)
367  i = i_block + threadIdx.x;
368  k = k_block + threadIdx.y;
369  if (i < m && k < p)
370  dst(i,j*p+k) = tmp[threadIdx.x][threadIdx.y];
371 
372  }
373  }
374  });
375  }
376  };
377 
378  // Specialization for Scalar2PCE specifically for Cuda that ensures
379  // coalesced reads and writes
380  template <>
381  struct CopyScalar2PCE<Kokkos::Cuda> {
382  typedef Kokkos::Cuda exec_space;
383 
384  template <typename DstView, typename SrcView>
385  CopyScalar2PCE(const DstView& dst, const SrcView& src) {
386  impl(dst,src);
387  }
388 
389  template <typename DstView, typename SrcView>
390  void impl(const DstView& dst, const SrcView& src) {
391  typedef typename SrcView::non_const_value_type Scalar;
392  typedef Kokkos::TeamPolicy<exec_space> Policy;
393  typedef typename Policy::member_type Member;
394 
395  const size_t m = dst.extent(0);
396  const size_t n = dst.extent(1);
397  const size_t p = Kokkos::dimension_scalar(dst);
398 
399  const size_t ChunkSize = 16;
400  const size_t M = (m+ChunkSize-1)/ChunkSize;
401 
402  Policy policy(M,ChunkSize,ChunkSize);
403  Kokkos::parallel_for( policy, [=] __device__(const Member& team)
404  {
405  __shared__ Scalar tmp[ChunkSize][ChunkSize];
406  const size_t i_block = blockIdx.x*ChunkSize;
407 
408  for (size_t j=0; j<n; ++j) {
409  for (size_t k_block=0; k_block<p; k_block+=ChunkSize) {
410 
411  // Make sure previous iteration has completed before overwriting tmp
412  __syncthreads();
413 
414  // Read ChunkSize x ChunkSize block (coalesced on i for LayoutLeft)
415  size_t i = i_block + threadIdx.x;
416  size_t k = k_block + threadIdx.y;
417  if (i < m && k < p)
418  tmp[threadIdx.x][threadIdx.y] = src(i,j*p+k);
419 
420  // Wait for all threads to finish
421  __syncthreads();
422 
423  // Write ChunkSize x ChunkSize block (coalesced on k)
424  i = i_block + threadIdx.y;
425  k = k_block + threadIdx.x;
426  if (i < m && k < p)
427  dst(i,j).fastAccessCoeff(k) = tmp[threadIdx.y][threadIdx.x];
428 
429  }
430  }
431  });
432  }
433  };
434 #endif
435 
436  } // Impl
437 
438  template <typename DstView, typename SrcView>
439  void copy_pce_to_scalar(const DstView& dst, const SrcView& src)
440  {
442  }
443 
444  template <typename DstView, typename SrcView>
445  void copy_scalar_to_pce(const DstView& dst, const SrcView& src)
446  {
448  }
449 
450  // Tpetra operator wrapper allowing a mean0-based operator (with double
451  // scalar type) to be applied to a UQ::PCE multi-vector
452  template <typename Scalar,
453  typename LocalOrdinal,
454  typename GlobalOrdinal,
455  typename Node>
457  virtual public Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
458  public:
459  typedef Scalar scalar_type;
462  typedef Node node_type;
464  typedef Tpetra::Operator<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> scalar_op_type;
465 
467  mb_op(mb_op_) {}
468 
470 
472  getDomainMap() const {
473  return mb_op->getDomainMap();
474  }
475 
477  getRangeMap() const {
478  return mb_op->getRangeMap();
479  }
480 
481  virtual void
482  apply (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
483  Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
485  Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(),
486  Scalar beta = Teuchos::ScalarTraits<Scalar>::zero()) const
487  {
488  auto xv = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
489  auto yv = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
490  const size_t pce_size = Kokkos::dimension_scalar(xv);
491  if (X_s == Teuchos::null ||
492  X_s->getNumVectors() != X.getNumVectors()*pce_size)
493  X_s = Teuchos::rcp(new scalar_mv_type(X.getMap(),
494  X.getNumVectors()*pce_size));
495  if (Y_s == Teuchos::null ||
496  Y_s->getNumVectors() != Y.getNumVectors()*pce_size)
497  Y_s = Teuchos::rcp(new scalar_mv_type(Y.getMap(),
498  Y.getNumVectors()*pce_size));
499  base_scalar_type alpha_s = alpha.fastAccessCoeff(0);
500  base_scalar_type beta_s = beta.fastAccessCoeff(0);
501 
502  {
503  auto xv_s = X_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
504  auto yv_s = Y_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
505 
506  copy_pce_to_scalar(xv_s, xv);
507  if (beta_s != 0.0)
508  copy_pce_to_scalar(yv_s, yv);
509  }
510 
511  mb_op->apply(*X_s, *Y_s, mode, alpha_s, beta_s);
512 
513  {
514  auto yv_s = Y_s->getLocalViewDevice(Tpetra::Access::ReadOnly);
515  copy_scalar_to_pce(yv, yv_s);
516  }
517  }
518 
519  virtual bool hasTransposeApply() const {
520  return mb_op->hasTransposeApply();
521  }
522 
523  private:
524 
525  typedef Tpetra::MultiVector<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> scalar_mv_type;
528 
529  };
530 
531 }
532 
533 #endif // STOKHOS_TPETRA_UTILITIES_HPP
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, N > > build_mean_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
void impl(const DstView &dst, const SrcView &src)
Stokhos::StandardStorage< int, double > Storage
CopyPCE2Scalar(const DstView &dst, const SrcView &src)
Kokkos::DefaultExecutionSpace execution_space
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const
void copy_pce_to_scalar(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P...> >::value, unsigned >::type dimension_scalar(const View< T, P...> &view)
Teuchos::RCP< scalar_mv_type > X_s
MeanViewType getMeanValues() const
Teuchos::RCP< const scalar_op_type > mb_op
Teuchos::RCP< Tpetra::CrsMatrix< typename Scalar::value_type, LO, GO, N > > build_mean_scalar_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
Tpetra::MultiVector< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_mv_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
GetMeanValsFunc(const ViewType &vals)
virtual void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
ViewType::execution_space execution_space
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Get mean values matrix for mean-based preconditioning.
MeanBasedTpetraOperator(const Teuchos::RCP< const scalar_op_type > &mb_op_)
Get mean values matrix for mean-based preconditioning.
void copy_scalar_to_pce(const DstView &dst, const SrcView &src)
CopyScalar2PCE(const DstView &dst, const SrcView &src)
Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node
ViewType::execution_space execution_space
int n
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const
Tpetra::Operator< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_op_type
Teuchos::RCP< scalar_mv_type > Y_s