Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_Tpetra_Utilities.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef STOKHOS_TPETRA_UTILITIES_HPP
11 #define STOKHOS_TPETRA_UTILITIES_HPP
12 
15 #include "Tpetra_CrsMatrix.hpp"
16 
17 namespace Stokhos {
18 
20 
23  template <class ViewType>
25  public:
26  typedef ViewType MeanViewType;
28  typedef typename ViewType::size_type size_type;
29 
30  GetMeanValsFunc(const ViewType& vals) {
31  mean_vals = ViewType("mean-values", vals.extent(0));
33  }
34 
35  MeanViewType getMeanValues() const { return mean_vals; }
36 
37  private:
39  };
40 
42 
44  template <class Storage, class ... P>
45  class GetMeanValsFunc< Kokkos::View< Sacado::UQ::PCE<Storage>*,
46  P... > > {
47  public:
49  typedef Kokkos::View< Scalar*, P... > ViewType;
52  typedef typename ViewType::size_type size_type;
53 
54  GetMeanValsFunc(const ViewType& vals_) : vals(vals_) {
55  const size_type nnz = vals.extent(0);
56  typename Scalar::cijk_type mean_cijk =
57  Stokhos::create_mean_based_product_tensor<execution_space, typename Storage::ordinal_type, typename Storage::value_type>();
58  mean_vals = Kokkos::make_view<ViewType>("mean-values", mean_cijk, nnz, 1);
59  Kokkos::parallel_for( nnz, *this );
60  }
61 
62  KOKKOS_INLINE_FUNCTION
63  void operator() (const size_type i) const {
64  mean_vals(i) = vals(i).fastAccessCoeff(0);
65  }
66 
67  MeanViewType getMeanValues() const { return mean_vals; }
68 
69  private:
72  };
73 
75 
77  template <class Storage, class ... P>
78  class GetMeanValsFunc< Kokkos::View< Sacado::MP::Vector<Storage>*,
79  P... > > {
80  public:
82  typedef Kokkos::View< Scalar*, P... > ViewType;
85  typedef typename ViewType::size_type size_type;
86 
87  GetMeanValsFunc(const ViewType& vals_) :
88  vals(vals_), vec_size(Kokkos::dimension_scalar(vals))
89  {
90  const size_type nnz = vals.extent(0);
91  mean_vals = ViewType("mean-values", nnz, 1);
92  Kokkos::parallel_for( nnz, *this );
93  }
94 
95  KOKKOS_INLINE_FUNCTION
96  void operator() (const size_type i) const
97  {
98  typename Scalar::value_type s = 0.0;
99  for (size_type j=0; j<vec_size; ++j)
100  s += vals(i).fastAccessCoeff(j);
101  mean_vals(i) = s;
102  }
103 
105 
106  private:
110  };
111 
113 
116  template <class ViewType>
118  public:
119  typedef ViewType MeanViewType;
121  typedef typename ViewType::size_type size_type;
122 
123  GetScalarMeanValsFunc(const ViewType& vals) {
124  mean_vals = ViewType("mean-values", vals.extent(0));
125  Kokkos::deep_copy( mean_vals, vals );
126  }
127 
129 
130  private:
132  };
133 
135 
137  template <class Storage, class ... P>
138  class GetScalarMeanValsFunc< Kokkos::View< Sacado::UQ::PCE<Storage>*,
139  P... > > {
140  public:
142  typedef typename Scalar::value_type MeanScalar;
143  typedef Kokkos::View< Scalar*, P... > ViewType;
144  typedef Kokkos::View< MeanScalar*, P... > MeanViewType;
146  typedef typename ViewType::size_type size_type;
147 
148  GetScalarMeanValsFunc(const ViewType& vals_) : vals(vals_) {
149  const size_type nnz = vals.extent(0);
150  mean_vals = MeanViewType("mean-values", nnz);
151  Kokkos::parallel_for( nnz, *this );
152  }
153 
154  KOKKOS_INLINE_FUNCTION
155  void operator() (const size_type i) const {
156  mean_vals(i) = vals(i).fastAccessCoeff(0);
157  }
158 
160 
161  private:
164  };
165 
167 
169  template <class Storage, class ... P>
170  class GetScalarMeanValsFunc< Kokkos::View< Sacado::MP::Vector<Storage>*,
171  P... > > {
172  public:
174  typedef typename Scalar::value_type MeanScalar;
175  typedef Kokkos::View< Scalar*, P... > ViewType;
176  typedef Kokkos::View< MeanScalar*, P... > MeanViewType;
178  typedef typename ViewType::size_type size_type;
179 
181  vals(vals_), vec_size(Kokkos::dimension_scalar(vals))
182  {
183  const size_type nnz = vals.extent(0);
184  mean_vals = ViewType("mean-values", nnz);
185  Kokkos::parallel_for( nnz, *this );
186  }
187 
188  KOKKOS_INLINE_FUNCTION
189  void operator() (const size_type i) const
190  {
191  typename Scalar::value_type s = 0.0;
192  for (size_type j=0; j<vec_size; ++j)
193  s += vals(i).fastAccessCoeff(j);
194  mean_vals(i) = s;
195  }
196 
198 
199  private:
203  };
204 
205  template <typename Scalar, typename LO, typename GO, typename N>
207  build_mean_matrix(const Tpetra::CrsMatrix<Scalar,LO,GO,N>& A)
208  {
209  using Teuchos::RCP;
210  using Teuchos::rcp;
211  typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
212  typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
213  typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
214 
215  KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
216  KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
217  const size_t ncols = kokkos_matrix.numCols();
219  typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
220  MeanFunc meanfunc(matrix_values);
221  KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
222 
223  // From here on we are assuming that
224  // KokkosMeanMatrixValuesType == KokkosMatrixValuestype
225 
226  RCP < MatrixType > mean_matrix =
227  rcp( new MatrixType(A.getCrsGraph(), mean_matrix_values) );
228  mean_matrix->fillComplete();
229  return mean_matrix;
230  }
231 
232  template <typename Scalar, typename LO, typename GO, typename N>
234  build_mean_scalar_matrix(const Tpetra::CrsMatrix<Scalar,LO,GO,N>& A)
235  {
236  using Teuchos::RCP;
237  using Teuchos::rcp;
238  typedef typename Scalar::value_type BaseScalar;
239  typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
240  typedef Tpetra::CrsMatrix<BaseScalar,LO,GO,N> ScalarMatrixType;
241  typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
242  typedef typename ScalarMatrixType::local_matrix_device_type ScalarKokkosMatrixType;
243  typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
244 
245  KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
246  KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
248  typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
249  MeanFunc meanfunc(matrix_values);
250  KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
251 
252  // From here on we are assuming that
253  // KokkosMeanMatrixValuesType == ScalarKokkosMatrixValuesType
254 
255  RCP < ScalarMatrixType > mean_matrix =
256  rcp( new ScalarMatrixType(A.getCrsGraph(), mean_matrix_values) );
257  mean_matrix->fillComplete();
258  return mean_matrix;
259  }
260 
261  namespace Impl {
262 
263  // Functor for copying a PCE view to a scalar view
264  // (Assumes view is rank 2, LayoutLeft)
265  template <typename ExecSpace>
266  struct CopyPCE2Scalar {
267  typedef ExecSpace exec_space;
268  template <typename DstView, typename SrcView>
269  CopyPCE2Scalar(const DstView& dst, const SrcView& src) {
270  impl(dst,src);
271  }
272 
273  template <typename DstView, typename SrcView>
274  void impl(const DstView& dst, const SrcView& src) {
275  typedef typename SrcView::non_const_value_type Scalar;
276  const size_t m = src.extent(0);
277  const size_t n = src.extent(1);
278  const size_t p = Kokkos::dimension_scalar(src);
279  Kokkos::RangePolicy<exec_space> policy(0,m);
280  Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const size_t i)
281  {
282  for (size_t j=0; j<n; ++j) {
283  const Scalar& s = src(i,j);
284  for (size_t k=0; k<p; ++k)
285  dst(i,j*p+k) = s.fastAccessCoeff(k);
286  }
287  });
288  }
289  };
290 
291  // Functor for copying a scalar view to a PCE view
292  // (Assumes view is rank 2, LayoutLeft)
293  template <typename ExecSpace>
294  struct CopyScalar2PCE {
295  typedef ExecSpace exec_space;
296 
297  template <typename DstView, typename SrcView>
298  CopyScalar2PCE(const DstView& dst, const SrcView& src) {
299  impl(dst,src);
300  }
301 
302  template <typename DstView, typename SrcView>
303  void impl(const DstView& dst, const SrcView& src) {
304  typedef typename DstView::non_const_value_type Scalar;
305  const size_t m = dst.extent(0);
306  const size_t n = dst.extent(1);
307  const size_t p = Kokkos::dimension_scalar(dst);
308 
309  Kokkos::RangePolicy<exec_space> policy(0,m);
310  Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const size_t i)
311  {
312  for (size_t j=0; j<n; ++j) {
313  Scalar& d = dst(i,j);
314  for (size_t k=0; k<p; ++k)
315  d.fastAccessCoeff(k) = src(i,j*p+k);
316  }
317  });
318  }
319  };
320 
321 #ifdef KOKKOS_ENABLE_CUDA
322  // Specialization for CopyPCE2Scalar specifically for Cuda that ensures
323  // coalesced reads and writes
324  template <>
325  struct CopyPCE2Scalar<Kokkos::Cuda> {
326  typedef Kokkos::Cuda exec_space;
327 
328  template <typename DstView, typename SrcView>
329  CopyPCE2Scalar(const DstView& dst, const SrcView& src) {
330  impl(dst,src);
331  }
332 
333  template <typename DstView, typename SrcView>
334  void impl(const DstView& dst, const SrcView& src) {
335  typedef typename DstView::non_const_value_type Scalar;
336  typedef Kokkos::TeamPolicy<exec_space> Policy;
337  typedef typename Policy::member_type Member;
338 
339  const size_t m = src.extent(0);
340  const size_t n = src.extent(1);
341  const size_t p = Kokkos::dimension_scalar(src);
342 
343  const size_t ChunkSize = 16;
344  const size_t M = (m+ChunkSize-1)/ChunkSize;
345 
346  Policy policy(M,ChunkSize,ChunkSize);
347  Kokkos::parallel_for( policy, [=] __device__(const Member& team)
348  {
349  __shared__ Scalar tmp[ChunkSize][ChunkSize];
350  const size_t i_block = blockIdx.x*ChunkSize;
351 
352  for (size_t j=0; j<n; ++j) {
353  for (size_t k_block=0; k_block<p; k_block+=ChunkSize) {
354 
355  // Make sure previous iteration has completed before overwriting tmp
356  __syncthreads();
357 
358  // Read ChunkSize x ChunkSize block (coalesced on k)
359  size_t i = i_block + threadIdx.y;
360  size_t k = k_block + threadIdx.x;
361  if (i < m && k < p)
362  tmp[threadIdx.y][threadIdx.x] = src(i,j).fastAccessCoeff(k);
363 
364  // Wait for all threads to finish
365  __syncthreads();
366 
367  // Write ChunkSize x ChunkSize block (coalesced on i for LayoutLeft)
368  i = i_block + threadIdx.x;
369  k = k_block + threadIdx.y;
370  if (i < m && k < p)
371  dst(i,j*p+k) = tmp[threadIdx.x][threadIdx.y];
372 
373  }
374  }
375  });
376  }
377  };
378 
379  // Specialization for Scalar2PCE specifically for Cuda that ensures
380  // coalesced reads and writes
381  template <>
382  struct CopyScalar2PCE<Kokkos::Cuda> {
383  typedef Kokkos::Cuda exec_space;
384 
385  template <typename DstView, typename SrcView>
386  CopyScalar2PCE(const DstView& dst, const SrcView& src) {
387  impl(dst,src);
388  }
389 
390  template <typename DstView, typename SrcView>
391  void impl(const DstView& dst, const SrcView& src) {
392  typedef typename SrcView::non_const_value_type Scalar;
393  typedef Kokkos::TeamPolicy<exec_space> Policy;
394  typedef typename Policy::member_type Member;
395 
396  const size_t m = dst.extent(0);
397  const size_t n = dst.extent(1);
398  const size_t p = Kokkos::dimension_scalar(dst);
399 
400  const size_t ChunkSize = 16;
401  const size_t M = (m+ChunkSize-1)/ChunkSize;
402 
403  Policy policy(M,ChunkSize,ChunkSize);
404  Kokkos::parallel_for( policy, [=] __device__(const Member& team)
405  {
406  __shared__ Scalar tmp[ChunkSize][ChunkSize];
407  const size_t i_block = blockIdx.x*ChunkSize;
408 
409  for (size_t j=0; j<n; ++j) {
410  for (size_t k_block=0; k_block<p; k_block+=ChunkSize) {
411 
412  // Make sure previous iteration has completed before overwriting tmp
413  __syncthreads();
414 
415  // Read ChunkSize x ChunkSize block (coalesced on i for LayoutLeft)
416  size_t i = i_block + threadIdx.x;
417  size_t k = k_block + threadIdx.y;
418  if (i < m && k < p)
419  tmp[threadIdx.x][threadIdx.y] = src(i,j*p+k);
420 
421  // Wait for all threads to finish
422  __syncthreads();
423 
424  // Write ChunkSize x ChunkSize block (coalesced on k)
425  i = i_block + threadIdx.y;
426  k = k_block + threadIdx.x;
427  if (i < m && k < p)
428  dst(i,j).fastAccessCoeff(k) = tmp[threadIdx.y][threadIdx.x];
429 
430  }
431  }
432  });
433  }
434  };
435 #endif
436 
437  } // Impl
438 
439  template <typename DstView, typename SrcView>
440  void copy_pce_to_scalar(const DstView& dst, const SrcView& src)
441  {
443  }
444 
445  template <typename DstView, typename SrcView>
446  void copy_scalar_to_pce(const DstView& dst, const SrcView& src)
447  {
449  }
450 
451  // Tpetra operator wrapper allowing a mean0-based operator (with double
452  // scalar type) to be applied to a UQ::PCE multi-vector
453  template <typename Scalar,
454  typename LocalOrdinal,
455  typename GlobalOrdinal,
456  typename Node>
458  virtual public Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
459  public:
460  typedef Scalar scalar_type;
463  typedef Node node_type;
465  typedef Tpetra::Operator<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> scalar_op_type;
466 
468  mb_op(mb_op_) {}
469 
471 
473  getDomainMap() const {
474  return mb_op->getDomainMap();
475  }
476 
478  getRangeMap() const {
479  return mb_op->getRangeMap();
480  }
481 
482  virtual void
483  apply (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
484  Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
486  Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(),
487  Scalar beta = Teuchos::ScalarTraits<Scalar>::zero()) const
488  {
489  typedef typename scalar_mv_type::device_type device_type;
490 
491  auto xv = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
492  auto yv = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
493  const size_t pce_size = Kokkos::dimension_scalar(xv);
494  if (X_s == Teuchos::null ||
495  X_s->getNumVectors() != X.getNumVectors()*pce_size)
496  X_s = Teuchos::rcp(new scalar_mv_type(X.getMap(),
497  X.getNumVectors()*pce_size));
498  if (Y_s == Teuchos::null ||
499  Y_s->getNumVectors() != Y.getNumVectors()*pce_size)
500  Y_s = Teuchos::rcp(new scalar_mv_type(Y.getMap(),
501  Y.getNumVectors()*pce_size));
502  base_scalar_type alpha_s = alpha.fastAccessCoeff(0);
503  base_scalar_type beta_s = beta.fastAccessCoeff(0);
504 
505  {
506  auto xv_s = X_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
507  auto yv_s = Y_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
508 
509  copy_pce_to_scalar(xv_s, xv);
510  if (beta_s != 0.0)
511  copy_pce_to_scalar(yv_s, yv);
512  }
513 
514  mb_op->apply(*X_s, *Y_s, mode, alpha_s, beta_s);
515 
516  {
517  auto yv_s = Y_s->getLocalViewDevice(Tpetra::Access::ReadOnly);
518  copy_scalar_to_pce(yv, yv_s);
519  }
520  }
521 
522  virtual bool hasTransposeApply() const {
523  return mb_op->hasTransposeApply();
524  }
525 
526  private:
527 
528  typedef Tpetra::MultiVector<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> scalar_mv_type;
531 
532  };
533 
534 }
535 
536 #endif // STOKHOS_TPETRA_UTILITIES_HPP
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, N > > build_mean_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
void impl(const DstView &dst, const SrcView &src)
Stokhos::StandardStorage< int, double > Storage
CopyPCE2Scalar(const DstView &dst, const SrcView &src)
Kokkos::DefaultExecutionSpace execution_space
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const
void copy_pce_to_scalar(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P...> >::value, unsigned >::type dimension_scalar(const View< T, P...> &view)
Teuchos::RCP< scalar_mv_type > X_s
MeanViewType getMeanValues() const
Teuchos::RCP< const scalar_op_type > mb_op
Teuchos::RCP< Tpetra::CrsMatrix< typename Scalar::value_type, LO, GO, N > > build_mean_scalar_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
Tpetra::MultiVector< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_mv_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
GetMeanValsFunc(const ViewType &vals)
virtual void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
ViewType::execution_space execution_space
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Get mean values matrix for mean-based preconditioning.
MeanBasedTpetraOperator(const Teuchos::RCP< const scalar_op_type > &mb_op_)
Get mean values matrix for mean-based preconditioning.
void copy_scalar_to_pce(const DstView &dst, const SrcView &src)
CopyScalar2PCE(const DstView &dst, const SrcView &src)
Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node
ViewType::execution_space execution_space
int n
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const
Tpetra::Operator< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_op_type
Teuchos::RCP< scalar_mv_type > Y_s