Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_Tpetra_Utilities.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_TPETRA_UTILITIES_HPP
43 #define STOKHOS_TPETRA_UTILITIES_HPP
44 
47 #include "Tpetra_CrsMatrix.hpp"
48 
49 namespace Stokhos {
50 
52 
55  template <class ViewType>
57  public:
58  typedef ViewType MeanViewType;
60  typedef typename ViewType::size_type size_type;
61 
62  GetMeanValsFunc(const ViewType& vals) {
63  mean_vals = ViewType("mean-values", vals.extent(0));
65  }
66 
67  MeanViewType getMeanValues() const { return mean_vals; }
68 
69  private:
71  };
72 
74 
76  template <class Storage, class ... P>
77  class GetMeanValsFunc< Kokkos::View< Sacado::UQ::PCE<Storage>*,
78  P... > > {
79  public:
81  typedef Kokkos::View< Scalar*, P... > ViewType;
84  typedef typename ViewType::size_type size_type;
85 
86  GetMeanValsFunc(const ViewType& vals_) : vals(vals_) {
87  const size_type nnz = vals.extent(0);
88  typename Scalar::cijk_type mean_cijk =
89  Stokhos::create_mean_based_product_tensor<execution_space, typename Storage::ordinal_type, typename Storage::value_type>();
90  mean_vals = Kokkos::make_view<ViewType>("mean-values", mean_cijk, nnz, 1);
91  Kokkos::parallel_for( nnz, *this );
92  }
93 
94  KOKKOS_INLINE_FUNCTION
95  void operator() (const size_type i) const {
96  mean_vals(i) = vals(i).fastAccessCoeff(0);
97  }
98 
99  MeanViewType getMeanValues() const { return mean_vals; }
100 
101  private:
104  };
105 
107 
109  template <class Storage, class ... P>
110  class GetMeanValsFunc< Kokkos::View< Sacado::MP::Vector<Storage>*,
111  P... > > {
112  public:
114  typedef Kokkos::View< Scalar*, P... > ViewType;
117  typedef typename ViewType::size_type size_type;
118 
119  GetMeanValsFunc(const ViewType& vals_) :
120  vals(vals_), vec_size(Kokkos::dimension_scalar(vals))
121  {
122  const size_type nnz = vals.extent(0);
123  mean_vals = ViewType("mean-values", nnz, 1);
124  Kokkos::parallel_for( nnz, *this );
125  }
126 
127  KOKKOS_INLINE_FUNCTION
128  void operator() (const size_type i) const
129  {
130  typename Scalar::value_type s = 0.0;
131  for (size_type j=0; j<vec_size; ++j)
132  s += vals(i).fastAccessCoeff(j);
133  mean_vals(i) = s;
134  }
135 
137 
138  private:
142  };
143 
145 
148  template <class ViewType>
150  public:
151  typedef ViewType MeanViewType;
153  typedef typename ViewType::size_type size_type;
154 
155  GetScalarMeanValsFunc(const ViewType& vals) {
156  mean_vals = ViewType("mean-values", vals.extent(0));
157  Kokkos::deep_copy( mean_vals, vals );
158  }
159 
161 
162  private:
164  };
165 
167 
169  template <class Storage, class ... P>
170  class GetScalarMeanValsFunc< Kokkos::View< Sacado::UQ::PCE<Storage>*,
171  P... > > {
172  public:
174  typedef typename Scalar::value_type MeanScalar;
175  typedef Kokkos::View< Scalar*, P... > ViewType;
176  typedef Kokkos::View< MeanScalar*, P... > MeanViewType;
178  typedef typename ViewType::size_type size_type;
179 
180  GetScalarMeanValsFunc(const ViewType& vals_) : vals(vals_) {
181  const size_type nnz = vals.extent(0);
182  mean_vals = MeanViewType("mean-values", nnz);
183  Kokkos::parallel_for( nnz, *this );
184  }
185 
186  KOKKOS_INLINE_FUNCTION
187  void operator() (const size_type i) const {
188  mean_vals(i) = vals(i).fastAccessCoeff(0);
189  }
190 
192 
193  private:
196  };
197 
199 
201  template <class Storage, class ... P>
202  class GetScalarMeanValsFunc< Kokkos::View< Sacado::MP::Vector<Storage>*,
203  P... > > {
204  public:
206  typedef typename Scalar::value_type MeanScalar;
207  typedef Kokkos::View< Scalar*, P... > ViewType;
208  typedef Kokkos::View< MeanScalar*, P... > MeanViewType;
210  typedef typename ViewType::size_type size_type;
211 
213  vals(vals_), vec_size(Kokkos::dimension_scalar(vals))
214  {
215  const size_type nnz = vals.extent(0);
216  mean_vals = ViewType("mean-values", nnz);
217  Kokkos::parallel_for( nnz, *this );
218  }
219 
220  KOKKOS_INLINE_FUNCTION
221  void operator() (const size_type i) const
222  {
223  typename Scalar::value_type s = 0.0;
224  for (size_type j=0; j<vec_size; ++j)
225  s += vals(i).fastAccessCoeff(j);
226  mean_vals(i) = s;
227  }
228 
230 
231  private:
235  };
236 
237  template <typename Scalar, typename LO, typename GO, typename N>
239  build_mean_matrix(const Tpetra::CrsMatrix<Scalar,LO,GO,N>& A)
240  {
241  using Teuchos::RCP;
242  using Teuchos::rcp;
243  typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
244  typedef Tpetra::Map<LO,GO,N> Map;
245 
246  typedef typename MatrixType::local_matrix_type KokkosMatrixType;
247 
248  typedef typename KokkosMatrixType::StaticCrsGraphType KokkosGraphType;
249  typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
250 
251  RCP< const Map > rmap = A.getRowMap();
252  RCP< const Map > cmap = A.getColMap();
253 
254  KokkosMatrixType kokkos_matrix = A.getLocalMatrix();
255  KokkosGraphType kokkos_graph = kokkos_matrix.graph;
256  KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
257  const size_t ncols = kokkos_matrix.numCols();
259  typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
260  MeanFunc meanfunc(matrix_values);
261  KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
262 
263  // From here on we are assuming that
264  // KokkosMeanMatrixValuesType == KokkosMatrixValuestype
265 
266  KokkosMatrixType mean_kokkos_matrix(
267  "mean-matrix", ncols, mean_matrix_values, kokkos_graph);
268  RCP < MatrixType > mean_matrix =
269  rcp( new MatrixType(rmap, cmap, mean_kokkos_matrix) );
270  return mean_matrix;
271  }
272 
273  template <typename Scalar, typename LO, typename GO, typename N>
275  build_mean_scalar_matrix(const Tpetra::CrsMatrix<Scalar,LO,GO,N>& A)
276  {
277  using Teuchos::RCP;
278  using Teuchos::rcp;
279  typedef typename Scalar::value_type BaseScalar;
280  typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
281  typedef Tpetra::CrsMatrix<BaseScalar,LO,GO,N> ScalarMatrixType;
282  typedef Tpetra::Map<LO,GO,N> Map;
283 
284  typedef typename MatrixType::local_matrix_type KokkosMatrixType;
285  typedef typename ScalarMatrixType::local_matrix_type ScalarKokkosMatrixType;
286 
287  typedef typename KokkosMatrixType::StaticCrsGraphType KokkosGraphType;
288  typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
289 
290  RCP< const Map > rmap = A.getRowMap();
291  RCP< const Map > cmap = A.getColMap();
292 
293  KokkosMatrixType kokkos_matrix = A.getLocalMatrix();
294  KokkosGraphType kokkos_graph = kokkos_matrix.graph;
295  KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
296  const size_t ncols = kokkos_matrix.numCols();
298  typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
299  MeanFunc meanfunc(matrix_values);
300  KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
301 
302  // From here on we are assuming that
303  // KokkosMeanMatrixValuesType == ScalarKokkosMatrixValuesType
304 
305  ScalarKokkosMatrixType mean_kokkos_matrix(
306  "mean-matrix", ncols, mean_matrix_values, kokkos_graph);
307  RCP < ScalarMatrixType > mean_matrix =
308  rcp( new ScalarMatrixType(rmap, cmap, mean_kokkos_matrix) );
309  return mean_matrix;
310  }
311 
312  namespace Impl {
313 
314  // Functor for copying a PCE view to a scalar view
315  // (Assumes view is rank 2, LayoutLeft)
316  template <typename ExecSpace>
317  struct CopyPCE2Scalar {
318  typedef ExecSpace exec_space;
319  template <typename DstView, typename SrcView>
320  CopyPCE2Scalar(const DstView& dst, const SrcView& src) {
321  impl(dst,src);
322  }
323 
324  template <typename DstView, typename SrcView>
325  void impl(const DstView& dst, const SrcView& src) {
326  typedef typename SrcView::non_const_value_type Scalar;
327  const size_t m = src.extent(0);
328  const size_t n = src.extent(1);
329  const size_t p = Kokkos::dimension_scalar(src);
330  Kokkos::RangePolicy<exec_space> policy(0,m);
331  Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const size_t i)
332  {
333  for (size_t j=0; j<n; ++j) {
334  Scalar& s = src(i,j);
335  for (size_t k=0; k<p; ++k)
336  dst(i,j*p+k) = s.fastAccessCoeff(k);
337  }
338  });
339  }
340  };
341 
342  // Functor for copying a scalar view to a PCE view
343  // (Assumes view is rank 2, LayoutLeft)
344  template <typename ExecSpace>
345  struct CopyScalar2PCE {
346  typedef ExecSpace exec_space;
347 
348  template <typename DstView, typename SrcView>
349  CopyScalar2PCE(const DstView& dst, const SrcView& src) {
350  impl(dst,src);
351  }
352 
353  template <typename DstView, typename SrcView>
354  void impl(const DstView& dst, const SrcView& src) {
355  typedef typename DstView::non_const_value_type Scalar;
356  const size_t m = dst.extent(0);
357  const size_t n = dst.extent(1);
358  const size_t p = Kokkos::dimension_scalar(dst);
359 
360  Kokkos::RangePolicy<exec_space> policy(0,m);
361  Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const size_t i)
362  {
363  for (size_t j=0; j<n; ++j) {
364  Scalar& d = dst(i,j);
365  for (size_t k=0; k<p; ++k)
366  d.fastAccessCoeff(k) = src(i,j*p+k);
367  }
368  });
369  }
370  };
371 
372 #ifdef KOKKOS_ENABLE_CUDA
373  // Specialization for CopyPCE2Scalar specifically for Cuda that ensures
374  // coalesced reads and writes
375  template <>
376  struct CopyPCE2Scalar<Kokkos::Cuda> {
377  typedef Kokkos::Cuda exec_space;
378 
379  template <typename DstView, typename SrcView>
380  CopyPCE2Scalar(const DstView& dst, const SrcView& src) {
381  impl(dst,src);
382  }
383 
384  template <typename DstView, typename SrcView>
385  void impl(const DstView& dst, const SrcView& src) {
386  typedef typename DstView::non_const_value_type Scalar;
387  typedef Kokkos::TeamPolicy<exec_space> Policy;
388  typedef typename Policy::member_type Member;
389 
390  const size_t m = src.extent(0);
391  const size_t n = src.extent(1);
392  const size_t p = Kokkos::dimension_scalar(src);
393 
394  const size_t ChunkSize = 16;
395  const size_t M = (m+ChunkSize-1)/ChunkSize;
396 
397  Policy policy(M,ChunkSize,ChunkSize);
398  Kokkos::parallel_for( policy, [=] __device__(const Member& team)
399  {
400  __shared__ Scalar tmp[ChunkSize][ChunkSize];
401  const size_t i_block = blockIdx.x*ChunkSize;
402 
403  for (size_t j=0; j<n; ++j) {
404  for (size_t k_block=0; k_block<p; k_block+=ChunkSize) {
405 
406  // Make sure previous iteration has completed before overwriting tmp
407  __syncthreads();
408 
409  // Read ChunkSize x ChunkSize block (coalesced on k)
410  size_t i = i_block + threadIdx.y;
411  size_t k = k_block + threadIdx.x;
412  if (i < m && k < p)
413  tmp[threadIdx.y][threadIdx.x] = src(i,j).fastAccessCoeff(k);
414 
415  // Wait for all threads to finish
416  __syncthreads();
417 
418  // Write ChunkSize x ChunkSize block (coalesced on i for LayoutLeft)
419  i = i_block + threadIdx.x;
420  k = k_block + threadIdx.y;
421  if (i < m && k < p)
422  dst(i,j*p+k) = tmp[threadIdx.x][threadIdx.y];
423 
424  }
425  }
426  });
427  }
428  };
429 
430  // Specialization for Scalar2PCE specifically for Cuda that ensures
431  // coalesced reads and writes
432  template <>
433  struct CopyScalar2PCE<Kokkos::Cuda> {
434  typedef Kokkos::Cuda exec_space;
435 
436  template <typename DstView, typename SrcView>
437  CopyScalar2PCE(const DstView& dst, const SrcView& src) {
438  impl(dst,src);
439  }
440 
441  template <typename DstView, typename SrcView>
442  void impl(const DstView& dst, const SrcView& src) {
443  typedef typename SrcView::non_const_value_type Scalar;
444  typedef Kokkos::TeamPolicy<exec_space> Policy;
445  typedef typename Policy::member_type Member;
446 
447  const size_t m = dst.extent(0);
448  const size_t n = dst.extent(1);
449  const size_t p = Kokkos::dimension_scalar(dst);
450 
451  const size_t ChunkSize = 16;
452  const size_t M = (m+ChunkSize-1)/ChunkSize;
453 
454  Policy policy(M,ChunkSize,ChunkSize);
455  Kokkos::parallel_for( policy, [=] __device__(const Member& team)
456  {
457  __shared__ Scalar tmp[ChunkSize][ChunkSize];
458  const size_t i_block = blockIdx.x*ChunkSize;
459 
460  for (size_t j=0; j<n; ++j) {
461  for (size_t k_block=0; k_block<p; k_block+=ChunkSize) {
462 
463  // Make sure previous iteration has completed before overwriting tmp
464  __syncthreads();
465 
466  // Read ChunkSize x ChunkSize block (coalesced on i for LayoutLeft)
467  size_t i = i_block + threadIdx.x;
468  size_t k = k_block + threadIdx.y;
469  if (i < m && k < p)
470  tmp[threadIdx.x][threadIdx.y] = src(i,j*p+k);
471 
472  // Wait for all threads to finish
473  __syncthreads();
474 
475  // Write ChunkSize x ChunkSize block (coalesced on k)
476  i = i_block + threadIdx.y;
477  k = k_block + threadIdx.x;
478  if (i < m && k < p)
479  dst(i,j).fastAccessCoeff(k) = tmp[threadIdx.y][threadIdx.x];
480 
481  }
482  }
483  });
484  }
485  };
486 #endif
487 
488  } // Impl
489 
490  template <typename DstView, typename SrcView>
491  void copy_pce_to_scalar(const DstView& dst, const SrcView& src)
492  {
494  }
495 
496  template <typename DstView, typename SrcView>
497  void copy_scalar_to_pce(const DstView& dst, const SrcView& src)
498  {
500  }
501 
502  // Tpetra operator wrapper allowing a mean0-based operator (with double
503  // scalar type) to be applied to a UQ::PCE multi-vector
504  template <typename Scalar,
505  typename LocalOrdinal,
506  typename GlobalOrdinal,
507  typename Node>
509  virtual public Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
510  public:
511  typedef Scalar scalar_type;
514  typedef Node node_type;
516  typedef Tpetra::Operator<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> scalar_op_type;
517 
519  mb_op(mb_op_) {}
520 
522 
524  getDomainMap() const {
525  return mb_op->getDomainMap();
526  }
527 
529  getRangeMap() const {
530  return mb_op->getRangeMap();
531  }
532 
533  virtual void
534  apply (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
535  Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
537  Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(),
538  Scalar beta = Teuchos::ScalarTraits<Scalar>::zero()) const
539  {
540  typedef typename scalar_mv_type::device_type device_type;
541 
542  auto xv = X.template getLocalView<device_type>();
543  auto yv = Y.template getLocalView<device_type>();
544  const size_t pce_size = Kokkos::dimension_scalar(xv);
545  if (X_s == Teuchos::null ||
546  X_s->getNumVectors() != X.getNumVectors()*pce_size)
547  X_s = Teuchos::rcp(new scalar_mv_type(X.getMap(),
548  X.getNumVectors()*pce_size));
549  if (Y_s == Teuchos::null ||
550  Y_s->getNumVectors() != Y.getNumVectors()*pce_size)
551  Y_s = Teuchos::rcp(new scalar_mv_type(Y.getMap(),
552  Y.getNumVectors()*pce_size));
553  auto xv_s = X_s->template getLocalView<device_type>();
554  auto yv_s = Y_s->template getLocalView<device_type>();
555  base_scalar_type alpha_s = alpha.fastAccessCoeff(0);
556  base_scalar_type beta_s = beta.fastAccessCoeff(0);
557 
558  copy_pce_to_scalar(xv_s, xv);
559  if (beta_s != 0.0)
560  copy_pce_to_scalar(yv_s, yv);
561 
562  mb_op->apply(*X_s, *Y_s, mode, alpha_s, beta_s);
563 
564  copy_scalar_to_pce(yv, yv_s);
565  }
566 
567  virtual bool hasTransposeApply() const {
568  return mb_op->hasTransposeApply();
569  }
570 
571  private:
572 
573  typedef Tpetra::MultiVector<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> scalar_mv_type;
576 
577  };
578 
579 }
580 
581 #endif // STOKHOS_TPETRA_UTILITIES_HPP
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, N > > build_mean_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
void impl(const DstView &dst, const SrcView &src)
Stokhos::StandardStorage< int, double > Storage
CopyPCE2Scalar(const DstView &dst, const SrcView &src)
Kokkos::DefaultExecutionSpace execution_space
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const
void copy_pce_to_scalar(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P...> >::value, unsigned >::type dimension_scalar(const View< T, P...> &view)
Teuchos::RCP< scalar_mv_type > X_s
MeanViewType getMeanValues() const
Teuchos::RCP< const scalar_op_type > mb_op
Teuchos::RCP< Tpetra::CrsMatrix< typename Scalar::value_type, LO, GO, N > > build_mean_scalar_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
Tpetra::MultiVector< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_mv_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
GetMeanValsFunc(const ViewType &vals)
KokkosClassic::DefaultNode::DefaultNodeType Node
virtual void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
ViewType::execution_space execution_space
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Get mean values matrix for mean-based preconditioning.
MeanBasedTpetraOperator(const Teuchos::RCP< const scalar_op_type > &mb_op_)
Get mean values matrix for mean-based preconditioning.
void copy_scalar_to_pce(const DstView &dst, const SrcView &src)
CopyScalar2PCE(const DstView &dst, const SrcView &src)
ViewType::execution_space execution_space
int n
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const
Tpetra::Operator< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_op_type
Teuchos::RCP< scalar_mv_type > Y_s