10 #ifndef STOKHOS_TPETRA_UTILITIES_HPP
11 #define STOKHOS_TPETRA_UTILITIES_HPP
15 #include "Tpetra_CrsMatrix.hpp"
23 template <
class ViewType>
31 mean_vals = ViewType(
"mean-values", vals.extent(0));
44 template <
class Storage,
class ... P>
56 typename Scalar::cijk_type mean_cijk =
57 Stokhos::create_mean_based_product_tensor<execution_space, typename Storage::ordinal_type, typename Storage::value_type>();
58 mean_vals = Kokkos::make_view<ViewType>(
"mean-values", mean_cijk, nnz, 1);
59 Kokkos::parallel_for( nnz, *
this );
62 KOKKOS_INLINE_FUNCTION
64 mean_vals(i) = vals(i).fastAccessCoeff(0);
77 template <
class Storage,
class ... P>
92 Kokkos::parallel_for( nnz, *
this );
95 KOKKOS_INLINE_FUNCTION
100 s += vals(i).fastAccessCoeff(
j);
116 template <
class ViewType>
124 mean_vals = ViewType(
"mean-values", vals.extent(0));
137 template <
class Storage,
class ... P>
151 Kokkos::parallel_for( nnz, *
this );
154 KOKKOS_INLINE_FUNCTION
156 mean_vals(i) = vals(i).fastAccessCoeff(0);
169 template <
class Storage,
class ... P>
185 Kokkos::parallel_for( nnz, *
this );
188 KOKKOS_INLINE_FUNCTION
193 s += vals(i).fastAccessCoeff(
j);
205 template <
typename Scalar,
typename LO,
typename GO,
typename N>
211 typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
212 typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
213 typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
215 KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
216 KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
217 const size_t ncols = kokkos_matrix.numCols();
219 typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
220 MeanFunc meanfunc(matrix_values);
221 KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
227 rcp(
new MatrixType(A.getCrsGraph(), mean_matrix_values) );
228 mean_matrix->fillComplete();
232 template <
typename Scalar,
typename LO,
typename GO,
typename N>
239 typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
240 typedef Tpetra::CrsMatrix<BaseScalar,LO,GO,N> ScalarMatrixType;
241 typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
242 typedef typename ScalarMatrixType::local_matrix_device_type ScalarKokkosMatrixType;
243 typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
245 KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
246 KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
248 typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
249 MeanFunc meanfunc(matrix_values);
250 KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
256 rcp(
new ScalarMatrixType(A.getCrsGraph(), mean_matrix_values) );
257 mean_matrix->fillComplete();
265 template <
typename ExecSpace>
268 template <
typename DstView,
typename SrcView>
273 template <
typename DstView,
typename SrcView>
274 void impl(
const DstView& dst,
const SrcView& src) {
275 typedef typename SrcView::non_const_value_type
Scalar;
276 const size_t m = src.extent(0);
277 const size_t n = src.extent(1);
279 Kokkos::RangePolicy<exec_space> policy(0,m);
280 Kokkos::parallel_for( policy, KOKKOS_LAMBDA(
const size_t i)
282 for (
size_t j=0;
j<n; ++
j) {
283 const Scalar& s = src(i,
j);
284 for (
size_t k=0; k<p; ++k)
285 dst(i,
j*p+k) = s.fastAccessCoeff(k);
293 template <
typename ExecSpace>
297 template <
typename DstView,
typename SrcView>
302 template <
typename DstView,
typename SrcView>
303 void impl(
const DstView& dst,
const SrcView& src) {
304 typedef typename DstView::non_const_value_type
Scalar;
305 const size_t m = dst.extent(0);
306 const size_t n = dst.extent(1);
309 Kokkos::RangePolicy<exec_space> policy(0,m);
310 Kokkos::parallel_for( policy, KOKKOS_LAMBDA(
const size_t i)
312 for (
size_t j=0;
j<n; ++
j) {
313 Scalar& d = dst(i,
j);
314 for (
size_t k=0; k<p; ++k)
315 d.fastAccessCoeff(k) = src(i,
j*p+k);
321 #ifdef KOKKOS_ENABLE_CUDA
325 struct CopyPCE2Scalar<Kokkos::Cuda> {
328 template <
typename DstView,
typename SrcView>
333 template <
typename DstView,
typename SrcView>
334 void impl(
const DstView& dst,
const SrcView& src) {
335 typedef typename DstView::non_const_value_type
Scalar;
336 typedef Kokkos::TeamPolicy<exec_space> Policy;
337 typedef typename Policy::member_type Member;
339 const size_t m = src.extent(0);
340 const size_t n = src.extent(1);
343 const size_t ChunkSize = 16;
344 const size_t M = (m+ChunkSize-1)/ChunkSize;
346 Policy policy(M,ChunkSize,ChunkSize);
347 Kokkos::parallel_for( policy, [=] __device__(
const Member& team)
349 __shared__ Scalar tmp[ChunkSize][ChunkSize];
350 const size_t i_block = blockIdx.x*ChunkSize;
352 for (
size_t j=0;
j<n; ++
j) {
353 for (
size_t k_block=0; k_block<p; k_block+=ChunkSize) {
359 size_t i = i_block + threadIdx.y;
360 size_t k = k_block + threadIdx.x;
362 tmp[threadIdx.y][threadIdx.x] = src(i,
j).fastAccessCoeff(k);
368 i = i_block + threadIdx.x;
369 k = k_block + threadIdx.y;
371 dst(i,
j*p+k) = tmp[threadIdx.x][threadIdx.y];
382 struct CopyScalar2PCE<Kokkos::Cuda> {
385 template <
typename DstView,
typename SrcView>
390 template <
typename DstView,
typename SrcView>
391 void impl(
const DstView& dst,
const SrcView& src) {
392 typedef typename SrcView::non_const_value_type
Scalar;
393 typedef Kokkos::TeamPolicy<exec_space> Policy;
394 typedef typename Policy::member_type Member;
396 const size_t m = dst.extent(0);
397 const size_t n = dst.extent(1);
400 const size_t ChunkSize = 16;
401 const size_t M = (m+ChunkSize-1)/ChunkSize;
403 Policy policy(M,ChunkSize,ChunkSize);
404 Kokkos::parallel_for( policy, [=] __device__(
const Member& team)
406 __shared__ Scalar tmp[ChunkSize][ChunkSize];
407 const size_t i_block = blockIdx.x*ChunkSize;
409 for (
size_t j=0;
j<n; ++
j) {
410 for (
size_t k_block=0; k_block<p; k_block+=ChunkSize) {
416 size_t i = i_block + threadIdx.x;
417 size_t k = k_block + threadIdx.y;
419 tmp[threadIdx.x][threadIdx.y] = src(i,
j*p+k);
425 i = i_block + threadIdx.y;
426 k = k_block + threadIdx.x;
428 dst(i,
j).fastAccessCoeff(k) = tmp[threadIdx.y][threadIdx.x];
439 template <
typename DstView,
typename SrcView>
445 template <
typename DstView,
typename SrcView>
453 template <
typename Scalar,
458 virtual public Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
465 typedef Tpetra::Operator<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node>
scalar_op_type;
474 return mb_op->getDomainMap();
479 return mb_op->getRangeMap();
483 apply (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
484 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
489 typedef typename scalar_mv_type::device_type device_type;
491 auto xv = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
492 auto yv = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
495 X_s->getNumVectors() != X.getNumVectors()*pce_size)
497 X.getNumVectors()*pce_size));
499 Y_s->getNumVectors() != Y.getNumVectors()*pce_size)
501 Y.getNumVectors()*pce_size));
506 auto xv_s =
X_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
507 auto yv_s =
Y_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
517 auto yv_s =
Y_s->getLocalViewDevice(Tpetra::Access::ReadOnly);
523 return mb_op->hasTransposeApply();
528 typedef Tpetra::MultiVector<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node>
scalar_mv_type;
536 #endif // STOKHOS_TPETRA_UTILITIES_HPP
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, N > > build_mean_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
MeanViewType getMeanValues() const
void impl(const DstView &dst, const SrcView &src)
Stokhos::StandardStorage< int, double > Storage
ViewType::execution_space execution_space
CopyPCE2Scalar(const DstView &dst, const SrcView &src)
Sacado::MP::Vector< Storage > Scalar
ViewType::size_type size_type
MeanViewType getMeanValues() const
GetMeanValsFunc(const ViewType &vals_)
Scalar::value_type MeanScalar
scalar_type::value_type base_scalar_type
Kokkos::DefaultExecutionSpace execution_space
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const
ViewType::execution_space execution_space
ViewType::execution_space execution_space
ViewType::execution_space execution_space
void copy_pce_to_scalar(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)
Sacado::UQ::PCE< Storage > Scalar
Kokkos::View< Scalar *, P... > ViewType
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P...> >::value, unsigned >::type dimension_scalar(const View< T, P...> &view)
Teuchos::RCP< scalar_mv_type > X_s
MeanViewType getMeanValues() const
ViewType::size_type size_type
virtual bool hasTransposeApply() const
Teuchos::RCP< const scalar_op_type > mb_op
Teuchos::RCP< Tpetra::CrsMatrix< typename Scalar::value_type, LO, GO, N > > build_mean_scalar_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
virtual ~MeanBasedTpetraOperator()
Sacado::MP::Vector< Storage > Scalar
Tpetra::MultiVector< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_mv_type
ViewType::size_type size_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Kokkos::View< MeanScalar *, P... > MeanViewType
MeanViewType getMeanValues() const
Kokkos::View< Scalar *, P... > ViewType
GetMeanValsFunc(const ViewType &vals)
virtual void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
ViewType::execution_space execution_space
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
MeanViewType getMeanValues() const
ViewType::size_type size_type
MeanViewType getMeanValues() const
Get mean values matrix for mean-based preconditioning.
MeanBasedTpetraOperator(const Teuchos::RCP< const scalar_op_type > &mb_op_)
ViewType::size_type size_type
GetScalarMeanValsFunc(const ViewType &vals_)
Kokkos::View< Scalar *, P... > ViewType
GetMeanValsFunc(const ViewType &vals_)
Get mean values matrix for mean-based preconditioning.
GlobalOrdinal global_ordinal_type
GetScalarMeanValsFunc(const ViewType &vals)
void copy_scalar_to_pce(const DstView &dst, const SrcView &src)
Kokkos::View< MeanScalar *, P... > MeanViewType
Sacado::UQ::PCE< Storage > Scalar
CopyScalar2PCE(const DstView &dst, const SrcView &src)
Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node
LocalOrdinal local_ordinal_type
GetScalarMeanValsFunc(const ViewType &vals_)
ViewType::execution_space execution_space
Scalar::value_type MeanScalar
ViewType::size_type size_type
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const
Tpetra::Operator< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_op_type
Kokkos::View< Scalar *, P... > ViewType
Teuchos::RCP< scalar_mv_type > Y_s