10 #ifndef TPETRA_DETAILS_RESIDUAL_HPP
11 #define TPETRA_DETAILS_RESIDUAL_HPP
13 #include "TpetraCore_config.h"
14 #include "Tpetra_CrsMatrix.hpp"
15 #include "Tpetra_LocalCrsMatrixOperator.hpp"
16 #include "Tpetra_MultiVector.hpp"
17 #include "Teuchos_RCP.hpp"
18 #include "Teuchos_ScalarTraits.hpp"
21 #include "KokkosSparse_spmv_impl.hpp"
38 template <
class AMatrix,
class MV,
class ConstMV,
class Offsets,
bool is_MV,
bool restrictedMode,
bool skipOffRank>
40 using execution_space =
typename AMatrix::execution_space;
41 using LO =
typename AMatrix::non_const_ordinal_type;
42 using value_type =
typename AMatrix::non_const_value_type;
43 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
44 using team_member =
typename team_policy::member_type;
45 #if KOKKOS_VERSION >= 40799
46 using ATV = KokkosKernels::ArithTraits<value_type>;
48 using ATV = Kokkos::ArithTraits<value_type>;
57 ConstMV X_domainmap_lcl;
60 const ConstMV& X_colmap_lcl_,
61 const ConstMV& B_lcl_,
63 const int rows_per_team_,
65 const ConstMV& X_domainmap_lcl_)
67 , X_colmap_lcl(X_colmap_lcl_)
70 , rows_per_team(rows_per_team_)
72 , X_domainmap_lcl(X_domainmap_lcl_) {}
74 KOKKOS_INLINE_FUNCTION
75 void operator()(
const team_member& dev)
const {
76 Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, rows_per_team), [&](
const LO& loop) {
77 const LO lclRow =
static_cast<LO
>(dev.league_rank()) * rows_per_team + loop;
79 if (lclRow >= A_lcl.numRows()) {
85 value_type A_x = ATV::zero();
87 if (!restrictedMode) {
88 const auto A_row = A_lcl.rowConst(lclRow);
89 const LO row_length =
static_cast<LO
>(A_row.length);
91 Kokkos::parallel_reduce(
92 Kokkos::ThreadVectorRange(dev, row_length), [&](
const LO iEntry, value_type& lsum) {
93 const auto A_val = A_row.value(iEntry);
94 lsum += A_val * X_colmap_lcl(A_row.colidx(iEntry), 0);
99 const LO offRankOffset = offsets(lclRow);
100 const size_t start = A_lcl.graph.row_map(lclRow);
101 const size_t end = A_lcl.graph.row_map(lclRow + 1);
103 Kokkos::parallel_reduce(
104 Kokkos::ThreadVectorRange(dev, start, end), [&](
const LO iEntry, value_type& lsum) {
105 const auto A_val = A_lcl.values(iEntry);
106 const auto lclCol = A_lcl.graph.entries(iEntry);
107 if (iEntry < offRankOffset)
108 lsum += A_val * X_domainmap_lcl(lclCol, 0);
109 else if (!skipOffRank)
110 lsum += A_val * X_colmap_lcl(lclCol, 0);
115 Kokkos::single(Kokkos::PerThread(dev), [&]() {
116 R_lcl(lclRow, 0) = B_lcl(lclRow, 0) - A_x;
120 const LO numVectors =
static_cast<LO
>(X_colmap_lcl.extent(1));
122 for (LO v = 0; v < numVectors; v++) {
123 value_type A_x = ATV::zero();
125 if (!restrictedMode) {
126 const auto A_row = A_lcl.rowConst(lclRow);
127 const LO row_length =
static_cast<LO
>(A_row.length);
129 Kokkos::parallel_reduce(
130 Kokkos::ThreadVectorRange(dev, row_length), [&](
const LO iEntry, value_type& lsum) {
131 const auto A_val = A_row.value(iEntry);
132 lsum += A_val * X_colmap_lcl(A_row.colidx(iEntry), v);
136 const LO offRankOffset = offsets(lclRow);
137 const size_t start = A_lcl.graph.row_map(lclRow);
138 const size_t end = A_lcl.graph.row_map(lclRow + 1);
140 Kokkos::parallel_reduce(
141 Kokkos::ThreadVectorRange(dev, start, end), [&](
const LO iEntry, value_type& lsum) {
142 const auto A_val = A_lcl.values(iEntry);
143 const auto lclCol = A_lcl.graph.entries(iEntry);
144 if (iEntry < offRankOffset)
145 lsum += A_val * X_domainmap_lcl(lclCol, v);
146 else if (!skipOffRank)
147 lsum += A_val * X_colmap_lcl(lclCol, v);
152 Kokkos::single(Kokkos::PerThread(dev), [&]() {
153 R_lcl(lclRow, v) = B_lcl(lclRow, v) - A_x;
163 template <
class AMatrix,
class MV,
class ConstMV,
class Offsets,
bool is_MV>
165 using execution_space =
typename AMatrix::execution_space;
166 using LO =
typename AMatrix::non_const_ordinal_type;
167 using value_type =
typename AMatrix::non_const_value_type;
168 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
169 using team_member =
typename team_policy::member_type;
170 #if KOKKOS_VERSION >= 40799
171 using ATV = KokkosKernels::ArithTraits<value_type>;
173 using ATV = Kokkos::ArithTraits<value_type>;
177 ConstMV X_colmap_lcl;
183 const ConstMV& X_colmap_lcl_,
185 const int rows_per_team_,
188 , X_colmap_lcl(X_colmap_lcl_)
190 , rows_per_team(rows_per_team_)
191 , offsets(offsets_) {}
193 KOKKOS_INLINE_FUNCTION
194 void operator()(
const team_member& dev)
const {
195 Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, rows_per_team), [&](
const LO& loop) {
196 const LO lclRow =
static_cast<LO
>(dev.league_rank()) * rows_per_team + loop;
198 if (lclRow >= A_lcl.numRows()) {
202 const LO offRankOffset = offsets(lclRow);
203 const size_t end = A_lcl.graph.row_map(lclRow + 1);
207 value_type A_x = ATV::zero();
209 Kokkos::parallel_reduce(
210 Kokkos::ThreadVectorRange(dev, offRankOffset, end), [&](
const LO iEntry, value_type& lsum) {
211 const auto A_val = A_lcl.values(iEntry);
212 const auto lclCol = A_lcl.graph.entries(iEntry);
213 lsum += A_val * X_colmap_lcl(lclCol, 0);
217 Kokkos::single(Kokkos::PerThread(dev), [&]() {
218 R_lcl(lclRow, 0) -= A_x;
222 const LO numVectors =
static_cast<LO
>(X_colmap_lcl.extent(1));
224 for (LO v = 0; v < numVectors; v++) {
225 value_type A_x = ATV::zero();
227 Kokkos::parallel_reduce(
228 Kokkos::ThreadVectorRange(dev, offRankOffset, end), [&](
const LO iEntry, value_type& lsum) {
229 const auto A_val = A_lcl.values(iEntry);
230 const auto lclCol = A_lcl.graph.entries(iEntry);
231 lsum += A_val * X_colmap_lcl(lclCol, v);
235 Kokkos::single(Kokkos::PerThread(dev), [&]() {
236 R_lcl(lclRow, v) -= A_x;
245 template <
class SC,
class LO,
class GO,
class NO>
250 const Kokkos::View<const size_t*, typename NO::device_type>& offsets,
252 using Teuchos::NO_TRANS;
259 using offset_type = Kokkos::View<const size_t*, typename NO::device_type>;
262 const_local_view_device_type X_colmap_lcl = X_colmap.
getLocalViewDevice(Access::ReadOnly);
265 const_local_view_device_type X_domainmap_lcl;
271 "R.getNumVectors() = "
276 "X has the wrong number of local rows. "
277 "X.getLocalLength() = "
279 "A.getColMap()->getLocalNumElements() = "
280 << A.
getColMap()->getLocalNumElements() <<
".");
284 "R has the wrong number of local rows. "
285 "R.getLocalLength() = "
287 "A.getRowMap()->getLocalNumElements() = "
288 << A.
getRowMap()->getLocalNumElements() <<
".");
292 "B has the wrong number of local rows. "
293 "B.getLocalLength() = "
295 "A.getRowMap()->getLocalNumElements() = "
296 << A.
getRowMap()->getLocalNumElements() <<
".");
298 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
299 "The matrix A is not "
300 "fill complete. You must call fillComplete() (possibly with "
301 "domain and range Map arguments) without an intervening "
302 "resumeFill() call before you may call this method.");
304 std::runtime_error,
"X, Y and B must be constant stride.");
307 TEUCHOS_TEST_FOR_EXCEPTION((X_colmap_lcl.data() == R_lcl.data() && X_colmap_lcl.data() !=
nullptr) ||
308 (X_colmap_lcl.data() == B_lcl.data() && X_colmap_lcl.data() !=
nullptr),
309 std::runtime_error,
"X, Y and R may not alias one another.");
313 if (!fusedResidual) {
314 SC one = Teuchos::ScalarTraits<SC>::one();
316 SC zero = Teuchos::ScalarTraits<SC>::zero();
319 A.
localApply(X_colmap, R, Teuchos::NO_TRANS, one, zero);
324 if (A_lcl.numRows() == 0) {
328 int64_t numLocalRows = A_lcl.numRows();
329 int64_t myNnz = A_lcl.nnz();
332 int vector_length = -1;
333 int64_t rows_per_thread = -1;
335 using execution_space =
typename CrsMatrix<SC, LO, GO, NO>::execution_space;
336 using policy_type =
typename Kokkos::TeamPolicy<execution_space>;
338 int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
339 int64_t worksets = (B_lcl.extent(0) + rows_per_team - 1) / rows_per_team;
341 policy_type policy(1, 1);
343 policy = policy_type(worksets, Kokkos::AUTO, vector_length);
345 policy = policy_type(worksets, team_size, vector_length);
348 bool is_vector = (X_colmap_lcl.extent(1) == 1);
351 if (X_domainmap ==
nullptr) {
352 using functor_type = LocalResidualFunctor<local_matrix_device_type, local_view_device_type, const_local_view_device_type, offset_type, false, false, false>;
353 functor_type func(A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
354 Kokkos::parallel_for(
"residual-vector", policy, func);
357 X_domainmap_lcl = X_domainmap->getLocalViewDevice(Access::ReadOnly);
358 using functor_type = LocalResidualFunctor<local_matrix_device_type, local_view_device_type, const_local_view_device_type, offset_type, false, true, false>;
359 functor_type func(A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
360 Kokkos::parallel_for(
"residual-vector", policy, func);
363 if (X_domainmap ==
nullptr) {
364 using functor_type = LocalResidualFunctor<local_matrix_device_type, local_view_device_type, const_local_view_device_type, offset_type, true, false, false>;
365 functor_type func(A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
366 Kokkos::parallel_for(
"residual-multivector", policy, func);
369 X_domainmap_lcl = X_domainmap->getLocalViewDevice(Access::ReadOnly);
370 using functor_type = LocalResidualFunctor<local_matrix_device_type, local_view_device_type, const_local_view_device_type, offset_type, true, true, false>;
371 functor_type func(A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
372 Kokkos::parallel_for(
"residual-multivector", policy, func);
377 template <
class SC,
class LO,
class GO,
class NO>
378 void localResidualWithCommCompOverlap(
const CrsMatrix<SC, LO, GO, NO>& A,
379 MultiVector<SC, LO, GO, NO>& X_colmap,
380 const MultiVector<SC, LO, GO, NO>& B,
381 MultiVector<SC, LO, GO, NO>& R,
382 const Kokkos::View<const size_t*, typename NO::device_type>& offsets,
383 const MultiVector<SC, LO, GO, NO>& X_domainmap) {
384 using Teuchos::NO_TRANS;
387 using import_type =
typename CrsMatrix<SC, LO, GO, NO>::import_type;
389 ProfilingRegion regionLocalApply(
"Tpetra::CrsMatrix::localResidualWithCommCompOverlap");
391 using local_matrix_device_type =
typename CrsMatrix<SC, LO, GO, NO>::local_matrix_device_type;
392 using local_view_device_type =
typename MultiVector<SC, LO, GO, NO>::dual_view_type::t_dev::non_const_type;
393 using const_local_view_device_type =
typename MultiVector<SC, LO, GO, NO>::dual_view_type::t_dev::const_type;
394 using offset_type = Kokkos::View<const size_t*, typename NO::device_type>;
396 local_matrix_device_type A_lcl = A.getLocalMatrixDevice();
397 const_local_view_device_type X_colmap_lcl = X_colmap.getLocalViewDevice(Access::ReadOnly);
398 const_local_view_device_type B_lcl = B.getLocalViewDevice(Access::ReadOnly);
399 local_view_device_type R_lcl = R.getLocalViewDevice(Access::OverwriteAll);
400 const_local_view_device_type X_domainmap_lcl = X_domainmap.getLocalViewDevice(Access::ReadOnly);
405 TEUCHOS_TEST_FOR_EXCEPTION(X_colmap.getNumVectors() != R.getNumVectors(), std::runtime_error,
406 "X.getNumVectors() = " << X_colmap.getNumVectors() <<
" != "
407 "R.getNumVectors() = "
408 << R.getNumVectors() <<
".");
409 TEUCHOS_TEST_FOR_EXCEPTION(X_colmap.getLocalLength() !=
410 A.getColMap()->getLocalNumElements(),
412 "X has the wrong number of local rows. "
413 "X.getLocalLength() = "
414 << X_colmap.getLocalLength() <<
" != "
415 "A.getColMap()->getLocalNumElements() = "
416 << A.getColMap()->getLocalNumElements() <<
".");
417 TEUCHOS_TEST_FOR_EXCEPTION(R.getLocalLength() !=
418 A.getRowMap()->getLocalNumElements(),
420 "R has the wrong number of local rows. "
421 "R.getLocalLength() = "
422 << R.getLocalLength() <<
" != "
423 "A.getRowMap()->getLocalNumElements() = "
424 << A.getRowMap()->getLocalNumElements() <<
".");
425 TEUCHOS_TEST_FOR_EXCEPTION(B.getLocalLength() !=
426 A.getRowMap()->getLocalNumElements(),
428 "B has the wrong number of local rows. "
429 "B.getLocalLength() = "
430 << B.getLocalLength() <<
" != "
431 "A.getRowMap()->getLocalNumElements() = "
432 << A.getRowMap()->getLocalNumElements() <<
".");
434 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error,
435 "The matrix A is not "
436 "fill complete. You must call fillComplete() (possibly with "
437 "domain and range Map arguments) without an intervening "
438 "resumeFill() call before you may call this method.");
439 TEUCHOS_TEST_FOR_EXCEPTION(!X_colmap.isConstantStride() || !R.isConstantStride() || !B.isConstantStride(),
440 std::runtime_error,
"X, Y and B must be constant stride.");
443 TEUCHOS_TEST_FOR_EXCEPTION((X_colmap_lcl.data() == R_lcl.data() && X_colmap_lcl.data() !=
nullptr) ||
444 (X_colmap_lcl.data() == B_lcl.data() && X_colmap_lcl.data() !=
nullptr),
445 std::runtime_error,
"X, Y and R may not alias one another.");
448 if (A_lcl.numRows() == 0) {
452 int64_t numLocalRows = A_lcl.numRows();
453 int64_t myNnz = A_lcl.nnz();
456 int vector_length = -1;
457 int64_t rows_per_thread = -1;
459 using execution_space =
typename CrsMatrix<SC, LO, GO, NO>::execution_space;
460 using policy_type =
typename Kokkos::TeamPolicy<execution_space>;
462 int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
463 int64_t worksets = (B_lcl.extent(0) + rows_per_team - 1) / rows_per_team;
465 policy_type policy(1, 1);
467 policy = policy_type(worksets, Kokkos::AUTO, vector_length);
469 policy = policy_type(worksets, team_size, vector_length);
472 bool is_vector = (X_colmap_lcl.extent(1) == 1);
475 using functor_type = LocalResidualFunctor<local_matrix_device_type, local_view_device_type, const_local_view_device_type, offset_type, false, true, true>;
476 functor_type func(A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
477 Kokkos::parallel_for(
"residual-vector", policy, func);
479 RCP<const import_type> importer = A.getGraph()->getImporter();
480 X_colmap.endImport(X_domainmap, *importer,
INSERT,
true);
482 Kokkos::fence(
"Tpetra::localResidualWithCommCompOverlap-1");
484 using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type, local_view_device_type, const_local_view_device_type, offset_type, false>;
485 functor_type2 func2(A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
486 Kokkos::parallel_for(
"residual-vector-offrank", policy, func2);
489 using functor_type = LocalResidualFunctor<local_matrix_device_type, local_view_device_type, const_local_view_device_type, offset_type, true, true, true>;
490 functor_type func(A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
491 Kokkos::parallel_for(
"residual-multivector", policy, func);
493 RCP<const import_type> importer = A.getGraph()->getImporter();
494 X_colmap.endImport(X_domainmap, *importer,
INSERT,
true);
496 Kokkos::fence(
"Tpetra::localResidualWithCommCompOverlap-2");
498 using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type, local_view_device_type, const_local_view_device_type, offset_type, true>;
499 functor_type2 func2(A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
500 Kokkos::parallel_for(
"residual-vector-offrank", policy, func2);
505 template <
class SC,
class LO,
class GO,
class NO>
512 using Teuchos::rcp_const_cast;
513 using Teuchos::rcpFromRef;
519 if (overlapCommunicationAndComputation)
520 TEUCHOS_ASSERT(skipCopyAndPermuteIfPossible);
527 bool restrictedMode =
false;
532 SC one = Teuchos::ScalarTraits<SC>::one(), negone = -one, zero = Teuchos::ScalarTraits<SC>::zero();
533 Aop.
apply(X_in, R_in, Teuchos::NO_TRANS, one, zero);
534 R_in.
update(one, B_in, negone);
543 using offset_type =
typename graph_type::offset_device_view_type;
546 const bool R_is_replicated =
555 RCP<const import_type> importer = A.
getGraph()->getImporter();
556 RCP<const export_type> exporter = A.
getGraph()->getExporter();
562 if (importer.is_null()) {
576 X_colMap = rcp_const_cast<MV>(rcpFromRef(X_in));
587 restrictedMode = skipCopyAndPermuteIfPossible && importer->isLocallyFitted();
589 if (debug && restrictedMode) {
590 TEUCHOS_TEST_FOR_EXCEPTION(!importer->getTargetMap()->isLocallyFitted(*importer->getSourceMap()), std::runtime_error,
591 "Source map and target map are not locally fitted, but Tpetra::residual thinks they are.");
595 X_colMap->beginImport(X_in, *importer,
INSERT, restrictedMode);
606 if (exporter.is_null()) {
610 R_rowMap = rcpFromRef(R_in);
618 RCP<const MV> B_rowMap;
619 if (exporter.is_null()) {
625 B_rowMap = rcp_const_cast<
const MV>(B_rowMapNonConst);
627 B_rowMap = rcpFromRef(B_in);
634 B_rowMapNonConst->doImport(B_in, *exporter,
ADD);
635 B_rowMap = rcp_const_cast<
const MV>(B_rowMapNonConst);
643 if (!exporter.is_null()) {
644 if (!importer.is_null())
645 X_colMap->endImport(X_in, *importer,
INSERT, restrictedMode);
646 if (restrictedMode && !importer.is_null())
647 localResidual(A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in);
649 localResidual(A, *X_colMap, *B_rowMap, *R_rowMap, offsets);
660 if (overlapCommunicationAndComputation) {
661 localResidualWithCommCompOverlap(A, *X_colMap, *B_rowMap, *R_rowMap, offsets, X_in);
663 X_colMap->endImport(X_in, *importer,
INSERT, restrictedMode);
664 localResidual(A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in);
667 if (!importer.is_null())
668 X_colMap->endImport(X_in, *importer,
INSERT, restrictedMode);
669 localResidual(A, *X_colMap, *B_rowMap, *R_rowMap, offsets);
686 if (R_is_replicated) {
695 #endif // TPETRA_DETAILS_RESIDUAL_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
One or more distributed dense vectors.
virtual void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const =0
Computes the operator-multivector application.
bool isDistributed() const
Whether this is a globally distributed object.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
Functor for computing R -= A_offRank*X_colmap.
offsets_view_type offsets_
Offsets (output argument)
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Insert new values that don't currently exist.
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Abstract interface for operators (e.g., matrices and preconditioners).
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Functor for computing the residual.
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
void start()
Start the deep_copy counter.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
void reduce()
Sum values of a locally replicated multivector across all processes.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
void residual(const Operator< SC, LO, GO, NO > &A, const MultiVector< SC, LO, GO, NO > &X, const MultiVector< SC, LO, GO, NO > &B, MultiVector< SC, LO, GO, NO > &R)
Computes R = B - A * X.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.