10 #ifndef STOKHOS_TPETRA_UTILITIES_HPP 
   11 #define STOKHOS_TPETRA_UTILITIES_HPP 
   15 #include "Tpetra_CrsMatrix.hpp" 
   23   template <
class ViewType>
 
   31       mean_vals = ViewType(
"mean-values", vals.extent(0));
 
   44   template <
class Storage, 
class ... P>
 
   56       typename Scalar::cijk_type mean_cijk =
 
   57         Stokhos::create_mean_based_product_tensor<execution_space, typename Storage::ordinal_type, typename Storage::value_type>();
 
   58       mean_vals = Kokkos::make_view<ViewType>(
"mean-values", mean_cijk, nnz, 1);
 
   59       Kokkos::parallel_for( nnz, *
this );
 
   62     KOKKOS_INLINE_FUNCTION
 
   64       mean_vals(i) = vals(i).fastAccessCoeff(0);
 
   77   template <
class Storage, 
class ... P>
 
   92       Kokkos::parallel_for( nnz, *
this );
 
   95     KOKKOS_INLINE_FUNCTION
 
  100         s += vals(i).fastAccessCoeff(
j);
 
  116   template <
class ViewType>
 
  124       mean_vals = ViewType(
"mean-values", vals.extent(0));
 
  137   template <
class Storage, 
class ... P>
 
  151       Kokkos::parallel_for( nnz, *
this );
 
  154     KOKKOS_INLINE_FUNCTION
 
  156       mean_vals(i) = vals(i).fastAccessCoeff(0);
 
  169   template <
class Storage, 
class ... P>
 
  185       Kokkos::parallel_for( nnz, *
this );
 
  188     KOKKOS_INLINE_FUNCTION
 
  193         s += vals(i).fastAccessCoeff(
j);
 
  205   template <
typename Scalar, 
typename LO, 
typename GO, 
typename N>
 
  211     typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
 
  212     typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
 
  213     typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
 
  215     KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
 
  216     KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
 
  217     const size_t ncols = kokkos_matrix.numCols();
 
  219     typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
 
  220     MeanFunc meanfunc(matrix_values);
 
  221     KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
 
  227       rcp( 
new MatrixType(A.getCrsGraph(), mean_matrix_values) );
 
  228     mean_matrix->fillComplete();
 
  232   template <
typename Scalar, 
typename LO, 
typename GO, 
typename N>
 
  239     typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
 
  240     typedef Tpetra::CrsMatrix<BaseScalar,LO,GO,N> ScalarMatrixType;
 
  241     typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
 
  242     typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
 
  244     KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
 
  245     KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
 
  247     typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
 
  248     MeanFunc meanfunc(matrix_values);
 
  249     KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
 
  255       rcp( 
new ScalarMatrixType(A.getCrsGraph(), mean_matrix_values) );
 
  256     mean_matrix->fillComplete();
 
  264   template <
typename ExecSpace>
 
  267     template <
typename DstView, 
typename SrcView>
 
  272     template <
typename DstView, 
typename SrcView>
 
  273     void impl(
const DstView& dst, 
const SrcView& src) {
 
  274       typedef typename SrcView::non_const_value_type 
Scalar;
 
  275       const size_t m = src.extent(0);
 
  276       const size_t n = src.extent(1);
 
  278       Kokkos::RangePolicy<exec_space> policy(0,m);
 
  279       Kokkos::parallel_for( policy, KOKKOS_LAMBDA(
const size_t i)
 
  281         for (
size_t j=0; 
j<n; ++
j) {
 
  282           const Scalar& s = src(i,
j);
 
  283           for (
size_t k=0; k<p; ++k)
 
  284             dst(i,
j*p+k) = s.fastAccessCoeff(k);
 
  292   template <
typename ExecSpace>
 
  296     template <
typename DstView, 
typename SrcView>
 
  301     template <
typename DstView, 
typename SrcView>
 
  302     void impl(
const DstView& dst, 
const SrcView& src) {
 
  303       typedef typename DstView::non_const_value_type 
Scalar;
 
  304       const size_t m = dst.extent(0);
 
  305       const size_t n = dst.extent(1);
 
  308       Kokkos::RangePolicy<exec_space> policy(0,m);
 
  309       Kokkos::parallel_for( policy, KOKKOS_LAMBDA(
const size_t i)
 
  311         for (
size_t j=0; 
j<n; ++
j) {
 
  312           Scalar& d = dst(i,
j);
 
  313           for (
size_t k=0; k<p; ++k)
 
  314             d.fastAccessCoeff(k) = src(i,
j*p+k);
 
  320 #ifdef KOKKOS_ENABLE_CUDA 
  324   struct CopyPCE2Scalar<Kokkos::Cuda> {
 
  327     template <
typename DstView, 
typename SrcView>
 
  332     template <
typename DstView, 
typename SrcView>
 
  333     void impl(
const DstView& dst, 
const SrcView& src) {
 
  334       typedef typename DstView::non_const_value_type 
Scalar;
 
  335       typedef Kokkos::TeamPolicy<exec_space> Policy;
 
  336       typedef typename Policy::member_type Member;
 
  338       const size_t m = src.extent(0);
 
  339       const size_t n = src.extent(1);
 
  342       const size_t ChunkSize = 16;
 
  343       const size_t M = (m+ChunkSize-1)/ChunkSize;
 
  345       Policy policy(M,ChunkSize,ChunkSize);
 
  346       Kokkos::parallel_for( policy, [=] __device__(
const Member& team)
 
  348         __shared__ Scalar tmp[ChunkSize][ChunkSize];
 
  349         const size_t i_block = blockIdx.x*ChunkSize;
 
  351         for (
size_t j=0; 
j<n; ++
j) {
 
  352           for (
size_t k_block=0; k_block<p; k_block+=ChunkSize) {
 
  358             size_t i = i_block + threadIdx.y;
 
  359             size_t k = k_block + threadIdx.x;
 
  361               tmp[threadIdx.y][threadIdx.x] = src(i,
j).fastAccessCoeff(k);
 
  367             i = i_block + threadIdx.x;
 
  368             k = k_block + threadIdx.y;
 
  370                 dst(i,
j*p+k) = tmp[threadIdx.x][threadIdx.y];
 
  381   struct CopyScalar2PCE<Kokkos::Cuda> {
 
  384     template <
typename DstView, 
typename SrcView>
 
  389     template <
typename DstView, 
typename SrcView>
 
  390     void impl(
const DstView& dst, 
const SrcView& src) {
 
  391       typedef typename SrcView::non_const_value_type 
Scalar;
 
  392       typedef Kokkos::TeamPolicy<exec_space> Policy;
 
  393       typedef typename Policy::member_type Member;
 
  395       const size_t m = dst.extent(0);
 
  396       const size_t n = dst.extent(1);
 
  399       const size_t ChunkSize = 16;
 
  400       const size_t M = (m+ChunkSize-1)/ChunkSize;
 
  402       Policy policy(M,ChunkSize,ChunkSize);
 
  403       Kokkos::parallel_for( policy, [=] __device__(
const Member& team)
 
  405         __shared__ Scalar tmp[ChunkSize][ChunkSize];
 
  406         const size_t i_block = blockIdx.x*ChunkSize;
 
  408         for (
size_t j=0; 
j<n; ++
j) {
 
  409           for (
size_t k_block=0; k_block<p; k_block+=ChunkSize) {
 
  415             size_t i = i_block + threadIdx.x;
 
  416             size_t k = k_block + threadIdx.y;
 
  418               tmp[threadIdx.x][threadIdx.y] = src(i,
j*p+k);
 
  424             i = i_block + threadIdx.y;
 
  425             k = k_block + threadIdx.x;
 
  427                 dst(i,
j).fastAccessCoeff(k) = tmp[threadIdx.y][threadIdx.x];
 
  438   template <
typename DstView, 
typename SrcView>
 
  444   template <
typename DstView, 
typename SrcView>
 
  452   template <
typename Scalar,
 
  457     virtual public Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
 
  464     typedef Tpetra::Operator<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> 
scalar_op_type;
 
  473       return mb_op->getDomainMap();
 
  478       return mb_op->getRangeMap();
 
  482     apply (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
 
  483            Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
 
  488       auto xv = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
 
  489       auto yv = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
 
  492           X_s->getNumVectors() != X.getNumVectors()*pce_size)
 
  494                                               X.getNumVectors()*pce_size));
 
  496           Y_s->getNumVectors() != Y.getNumVectors()*pce_size)
 
  498                                               Y.getNumVectors()*pce_size));
 
  503         auto xv_s = 
X_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
 
  504         auto yv_s = 
Y_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
 
  514         auto yv_s = 
Y_s->getLocalViewDevice(Tpetra::Access::ReadOnly);
 
  520       return mb_op->hasTransposeApply();
 
  525     typedef Tpetra::MultiVector<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> 
scalar_mv_type;
 
  533 #endif // STOKHOS_TPETRA_UTILITIES_HPP 
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, N > > build_mean_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
 
MeanViewType getMeanValues() const 
 
void impl(const DstView &dst, const SrcView &src)
 
Stokhos::StandardStorage< int, double > Storage
 
ViewType::execution_space execution_space
 
CopyPCE2Scalar(const DstView &dst, const SrcView &src)
 
Sacado::MP::Vector< Storage > Scalar
 
ViewType::size_type size_type
 
MeanViewType getMeanValues() const 
 
GetMeanValsFunc(const ViewType &vals_)
 
Scalar::value_type MeanScalar
 
scalar_type::value_type base_scalar_type
 
Kokkos::DefaultExecutionSpace execution_space
 
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const 
 
ViewType::execution_space execution_space
 
ViewType::execution_space execution_space
 
ViewType::execution_space execution_space
 
void copy_pce_to_scalar(const DstView &dst, const SrcView &src)
 
void impl(const DstView &dst, const SrcView &src)
 
Sacado::UQ::PCE< Storage > Scalar
 
Kokkos::View< Scalar *, P... > ViewType
 
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P...> >::value, unsigned >::type dimension_scalar(const View< T, P...> &view)
 
Teuchos::RCP< scalar_mv_type > X_s
 
MeanViewType getMeanValues() const 
 
ViewType::size_type size_type
 
virtual bool hasTransposeApply() const 
 
Teuchos::RCP< const scalar_op_type > mb_op
 
Teuchos::RCP< Tpetra::CrsMatrix< typename Scalar::value_type, LO, GO, N > > build_mean_scalar_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
 
virtual ~MeanBasedTpetraOperator()
 
Sacado::MP::Vector< Storage > Scalar
 
Tpetra::MultiVector< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_mv_type
 
ViewType::size_type size_type
 
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
 
Kokkos::View< MeanScalar *, P... > MeanViewType
 
MeanViewType getMeanValues() const 
 
Kokkos::View< Scalar *, P... > ViewType
 
GetMeanValsFunc(const ViewType &vals)
 
virtual void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const 
 
ViewType::execution_space execution_space
 
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
 
MeanViewType getMeanValues() const 
 
ViewType::size_type size_type
 
MeanViewType getMeanValues() const 
 
Get mean values matrix for mean-based preconditioning. 
 
MeanBasedTpetraOperator(const Teuchos::RCP< const scalar_op_type > &mb_op_)
 
ViewType::size_type size_type
 
GetScalarMeanValsFunc(const ViewType &vals_)
 
Kokkos::View< Scalar *, P... > ViewType
 
GetMeanValsFunc(const ViewType &vals_)
 
Get mean values matrix for mean-based preconditioning. 
 
GlobalOrdinal global_ordinal_type
 
GetScalarMeanValsFunc(const ViewType &vals)
 
void copy_scalar_to_pce(const DstView &dst, const SrcView &src)
 
Kokkos::View< MeanScalar *, P... > MeanViewType
 
Sacado::UQ::PCE< Storage > Scalar
 
CopyScalar2PCE(const DstView &dst, const SrcView &src)
 
Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node
 
LocalOrdinal local_ordinal_type
 
GetScalarMeanValsFunc(const ViewType &vals_)
 
ViewType::execution_space execution_space
 
Scalar::value_type MeanScalar
 
ViewType::size_type size_type
 
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const 
 
Tpetra::Operator< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_op_type
 
Kokkos::View< Scalar *, P... > ViewType
 
Teuchos::RCP< scalar_mv_type > Y_s