42 #ifndef STOKHOS_TPETRA_UTILITIES_HPP
43 #define STOKHOS_TPETRA_UTILITIES_HPP
47 #include "Tpetra_CrsMatrix.hpp"
55 template <
class ViewType>
63 mean_vals = ViewType(
"mean-values", vals.extent(0));
76 template <
class Storage,
class ... P>
88 typename Scalar::cijk_type mean_cijk =
89 Stokhos::create_mean_based_product_tensor<execution_space, typename Storage::ordinal_type, typename Storage::value_type>();
90 mean_vals = Kokkos::make_view<ViewType>(
"mean-values", mean_cijk, nnz, 1);
91 Kokkos::parallel_for( nnz, *
this );
94 KOKKOS_INLINE_FUNCTION
96 mean_vals(i) = vals(i).fastAccessCoeff(0);
109 template <
class Storage,
class ... P>
124 Kokkos::parallel_for( nnz, *
this );
127 KOKKOS_INLINE_FUNCTION
132 s += vals(i).fastAccessCoeff(
j);
148 template <
class ViewType>
156 mean_vals = ViewType(
"mean-values", vals.extent(0));
169 template <
class Storage,
class ... P>
183 Kokkos::parallel_for( nnz, *
this );
186 KOKKOS_INLINE_FUNCTION
188 mean_vals(i) = vals(i).fastAccessCoeff(0);
201 template <
class Storage,
class ... P>
217 Kokkos::parallel_for( nnz, *
this );
220 KOKKOS_INLINE_FUNCTION
225 s += vals(i).fastAccessCoeff(
j);
237 template <
typename Scalar,
typename LO,
typename GO,
typename N>
243 typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
244 typedef Tpetra::Map<LO,GO,N> Map;
246 typedef typename MatrixType::local_matrix_type KokkosMatrixType;
248 typedef typename KokkosMatrixType::StaticCrsGraphType KokkosGraphType;
249 typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
254 KokkosMatrixType kokkos_matrix = A.getLocalMatrix();
255 KokkosGraphType kokkos_graph = kokkos_matrix.graph;
256 KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
257 const size_t ncols = kokkos_matrix.numCols();
259 typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
260 MeanFunc meanfunc(matrix_values);
261 KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
266 KokkosMatrixType mean_kokkos_matrix(
267 "mean-matrix", ncols, mean_matrix_values, kokkos_graph);
269 rcp(
new MatrixType(rmap, cmap, mean_kokkos_matrix) );
273 template <
typename Scalar,
typename LO,
typename GO,
typename N>
280 typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
281 typedef Tpetra::CrsMatrix<BaseScalar,LO,GO,N> ScalarMatrixType;
282 typedef Tpetra::Map<LO,GO,N> Map;
284 typedef typename MatrixType::local_matrix_type KokkosMatrixType;
285 typedef typename ScalarMatrixType::local_matrix_type ScalarKokkosMatrixType;
287 typedef typename KokkosMatrixType::StaticCrsGraphType KokkosGraphType;
288 typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
293 KokkosMatrixType kokkos_matrix = A.getLocalMatrix();
294 KokkosGraphType kokkos_graph = kokkos_matrix.graph;
295 KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
296 const size_t ncols = kokkos_matrix.numCols();
298 typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
299 MeanFunc meanfunc(matrix_values);
300 KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
305 ScalarKokkosMatrixType mean_kokkos_matrix(
306 "mean-matrix", ncols, mean_matrix_values, kokkos_graph);
308 rcp(
new ScalarMatrixType(rmap, cmap, mean_kokkos_matrix) );
316 template <
typename ExecSpace>
319 template <
typename DstView,
typename SrcView>
324 template <
typename DstView,
typename SrcView>
325 void impl(
const DstView& dst,
const SrcView& src) {
326 typedef typename SrcView::non_const_value_type
Scalar;
327 const size_t m = src.extent(0);
328 const size_t n = src.extent(1);
330 Kokkos::RangePolicy<exec_space> policy(0,m);
331 Kokkos::parallel_for( policy, KOKKOS_LAMBDA(
const size_t i)
333 for (
size_t j=0;
j<n; ++
j) {
334 Scalar& s = src(i,
j);
335 for (
size_t k=0; k<p; ++k)
336 dst(i,
j*p+k) = s.fastAccessCoeff(k);
344 template <
typename ExecSpace>
348 template <
typename DstView,
typename SrcView>
353 template <
typename DstView,
typename SrcView>
354 void impl(
const DstView& dst,
const SrcView& src) {
355 typedef typename DstView::non_const_value_type
Scalar;
356 const size_t m = dst.extent(0);
357 const size_t n = dst.extent(1);
360 Kokkos::RangePolicy<exec_space> policy(0,m);
361 Kokkos::parallel_for( policy, KOKKOS_LAMBDA(
const size_t i)
363 for (
size_t j=0;
j<n; ++
j) {
364 Scalar& d = dst(i,
j);
365 for (
size_t k=0; k<p; ++k)
366 d.fastAccessCoeff(k) = src(i,
j*p+k);
372 #ifdef KOKKOS_ENABLE_CUDA
376 struct CopyPCE2Scalar<Kokkos::Cuda> {
379 template <
typename DstView,
typename SrcView>
384 template <
typename DstView,
typename SrcView>
385 void impl(
const DstView& dst,
const SrcView& src) {
386 typedef typename DstView::non_const_value_type
Scalar;
387 typedef Kokkos::TeamPolicy<exec_space> Policy;
388 typedef typename Policy::member_type Member;
390 const size_t m = src.extent(0);
391 const size_t n = src.extent(1);
394 const size_t ChunkSize = 16;
395 const size_t M = (m+ChunkSize-1)/ChunkSize;
397 Policy policy(M,ChunkSize,ChunkSize);
398 Kokkos::parallel_for( policy, [=] __device__(
const Member& team)
400 __shared__ Scalar tmp[ChunkSize][ChunkSize];
401 const size_t i_block = blockIdx.x*ChunkSize;
403 for (
size_t j=0;
j<n; ++
j) {
404 for (
size_t k_block=0; k_block<p; k_block+=ChunkSize) {
410 size_t i = i_block + threadIdx.y;
411 size_t k = k_block + threadIdx.x;
413 tmp[threadIdx.y][threadIdx.x] = src(i,
j).fastAccessCoeff(k);
419 i = i_block + threadIdx.x;
420 k = k_block + threadIdx.y;
422 dst(i,
j*p+k) = tmp[threadIdx.x][threadIdx.y];
433 struct CopyScalar2PCE<Kokkos::Cuda> {
436 template <
typename DstView,
typename SrcView>
441 template <
typename DstView,
typename SrcView>
442 void impl(
const DstView& dst,
const SrcView& src) {
443 typedef typename SrcView::non_const_value_type
Scalar;
444 typedef Kokkos::TeamPolicy<exec_space> Policy;
445 typedef typename Policy::member_type Member;
447 const size_t m = dst.extent(0);
448 const size_t n = dst.extent(1);
451 const size_t ChunkSize = 16;
452 const size_t M = (m+ChunkSize-1)/ChunkSize;
454 Policy policy(M,ChunkSize,ChunkSize);
455 Kokkos::parallel_for( policy, [=] __device__(
const Member& team)
457 __shared__ Scalar tmp[ChunkSize][ChunkSize];
458 const size_t i_block = blockIdx.x*ChunkSize;
460 for (
size_t j=0;
j<n; ++
j) {
461 for (
size_t k_block=0; k_block<p; k_block+=ChunkSize) {
467 size_t i = i_block + threadIdx.x;
468 size_t k = k_block + threadIdx.y;
470 tmp[threadIdx.x][threadIdx.y] = src(i,
j*p+k);
476 i = i_block + threadIdx.y;
477 k = k_block + threadIdx.x;
479 dst(i,
j).fastAccessCoeff(k) = tmp[threadIdx.y][threadIdx.x];
490 template <
typename DstView,
typename SrcView>
496 template <
typename DstView,
typename SrcView>
504 template <
typename Scalar,
509 virtual public Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
516 typedef Tpetra::Operator<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node>
scalar_op_type;
525 return mb_op->getDomainMap();
530 return mb_op->getRangeMap();
534 apply (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
535 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
540 typedef typename scalar_mv_type::device_type device_type;
542 auto xv = X.template getLocalView<device_type>();
543 auto yv = Y.template getLocalView<device_type>();
546 X_s->getNumVectors() != X.getNumVectors()*pce_size)
548 X.getNumVectors()*pce_size));
550 Y_s->getNumVectors() != Y.getNumVectors()*pce_size)
552 Y.getNumVectors()*pce_size));
553 auto xv_s =
X_s->template getLocalView<device_type>();
554 auto yv_s =
Y_s->template getLocalView<device_type>();
568 return mb_op->hasTransposeApply();
573 typedef Tpetra::MultiVector<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node>
scalar_mv_type;
581 #endif // STOKHOS_TPETRA_UTILITIES_HPP
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, N > > build_mean_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
MeanViewType getMeanValues() const
void impl(const DstView &dst, const SrcView &src)
Stokhos::StandardStorage< int, double > Storage
ViewType::execution_space execution_space
CopyPCE2Scalar(const DstView &dst, const SrcView &src)
Sacado::MP::Vector< Storage > Scalar
ViewType::size_type size_type
MeanViewType getMeanValues() const
GetMeanValsFunc(const ViewType &vals_)
Scalar::value_type MeanScalar
scalar_type::value_type base_scalar_type
Kokkos::DefaultExecutionSpace execution_space
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const
ViewType::execution_space execution_space
ViewType::execution_space execution_space
ViewType::execution_space execution_space
void copy_pce_to_scalar(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)
Sacado::UQ::PCE< Storage > Scalar
Kokkos::View< Scalar *, P... > ViewType
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P...> >::value, unsigned >::type dimension_scalar(const View< T, P...> &view)
Teuchos::RCP< scalar_mv_type > X_s
MeanViewType getMeanValues() const
ViewType::size_type size_type
virtual bool hasTransposeApply() const
Teuchos::RCP< const scalar_op_type > mb_op
Teuchos::RCP< Tpetra::CrsMatrix< typename Scalar::value_type, LO, GO, N > > build_mean_scalar_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
virtual ~MeanBasedTpetraOperator()
Sacado::MP::Vector< Storage > Scalar
Tpetra::MultiVector< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_mv_type
ViewType::size_type size_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Kokkos::View< MeanScalar *, P... > MeanViewType
MeanViewType getMeanValues() const
Kokkos::View< Scalar *, P... > ViewType
GetMeanValsFunc(const ViewType &vals)
KokkosClassic::DefaultNode::DefaultNodeType Node
virtual void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
ViewType::execution_space execution_space
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
MeanViewType getMeanValues() const
ViewType::size_type size_type
MeanViewType getMeanValues() const
Get mean values matrix for mean-based preconditioning.
MeanBasedTpetraOperator(const Teuchos::RCP< const scalar_op_type > &mb_op_)
ViewType::size_type size_type
GetScalarMeanValsFunc(const ViewType &vals_)
Kokkos::View< Scalar *, P... > ViewType
GetMeanValsFunc(const ViewType &vals_)
Get mean values matrix for mean-based preconditioning.
GlobalOrdinal global_ordinal_type
GetScalarMeanValsFunc(const ViewType &vals)
void copy_scalar_to_pce(const DstView &dst, const SrcView &src)
Kokkos::View< MeanScalar *, P... > MeanViewType
Sacado::UQ::PCE< Storage > Scalar
CopyScalar2PCE(const DstView &dst, const SrcView &src)
LocalOrdinal local_ordinal_type
GetScalarMeanValsFunc(const ViewType &vals_)
ViewType::execution_space execution_space
Scalar::value_type MeanScalar
ViewType::size_type size_type
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const
Tpetra::Operator< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_op_type
Kokkos::View< Scalar *, P... > ViewType
Teuchos::RCP< scalar_mv_type > Y_s