10 #ifndef TPETRA_DETAILS_RESIDUAL_HPP
11 #define TPETRA_DETAILS_RESIDUAL_HPP
13 #include "TpetraCore_config.h"
14 #include "Tpetra_CrsMatrix.hpp"
15 #include "Tpetra_LocalCrsMatrixOperator.hpp"
16 #include "Tpetra_MultiVector.hpp"
17 #include "Teuchos_RCP.hpp"
18 #include "Teuchos_ScalarTraits.hpp"
21 #include "KokkosSparse_spmv_impl.hpp"
39 template<
class AMatrix,
class MV,
class ConstMV,
class Offsets,
bool is_MV,
bool restrictedMode,
bool skipOffRank>
42 using execution_space =
typename AMatrix::execution_space;
43 using LO =
typename AMatrix::non_const_ordinal_type;
44 using value_type =
typename AMatrix::non_const_value_type;
45 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
46 using team_member =
typename team_policy::member_type;
47 using ATV = Kokkos::ArithTraits<value_type>;
55 ConstMV X_domainmap_lcl;
59 const ConstMV& X_colmap_lcl_,
60 const ConstMV& B_lcl_,
62 const int rows_per_team_,
64 const ConstMV& X_domainmap_lcl_) :
66 X_colmap_lcl(X_colmap_lcl_),
69 rows_per_team(rows_per_team_),
71 X_domainmap_lcl(X_domainmap_lcl_)
74 KOKKOS_INLINE_FUNCTION
75 void operator() (
const team_member& dev)
const
78 Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (
const LO& loop) {
79 const LO lclRow =
static_cast<LO
> (dev.league_rank ()) * rows_per_team + loop;
81 if (lclRow >= A_lcl.numRows ()) {
87 value_type A_x = ATV::zero ();
89 if (!restrictedMode) {
90 const auto A_row = A_lcl.rowConst(lclRow);
91 const LO row_length =
static_cast<LO
> (A_row.length);
93 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, row_length), [&] (
const LO iEntry, value_type& lsum) {
94 const auto A_val = A_row.value(iEntry);
95 lsum += A_val * X_colmap_lcl(A_row.colidx(iEntry),0);
101 const LO offRankOffset = offsets(lclRow);
102 const size_t start = A_lcl.graph.row_map(lclRow);
103 const size_t end = A_lcl.graph.row_map(lclRow+1);
105 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, start, end), [&] (
const LO iEntry, value_type& lsum) {
106 const auto A_val = A_lcl.values(iEntry);
107 const auto lclCol = A_lcl.graph.entries(iEntry);
108 if (iEntry < offRankOffset)
109 lsum += A_val * X_domainmap_lcl(lclCol,0);
110 else if (!skipOffRank)
111 lsum += A_val * X_colmap_lcl(lclCol,0);
115 Kokkos::single(Kokkos::PerThread(dev),[&] () {
116 R_lcl(lclRow,0) = B_lcl(lclRow,0) - A_x;
121 const LO numVectors =
static_cast<LO
>(X_colmap_lcl.extent(1));
123 for(LO v=0; v<numVectors; v++) {
125 value_type A_x = ATV::zero ();
127 if (!restrictedMode) {
129 const auto A_row = A_lcl.rowConst(lclRow);
130 const LO row_length =
static_cast<LO
> (A_row.length);
132 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, row_length), [&] (
const LO iEntry, value_type& lsum) {
133 const auto A_val = A_row.value(iEntry);
134 lsum += A_val * X_colmap_lcl(A_row.colidx(iEntry),v);
138 const LO offRankOffset = offsets(lclRow);
139 const size_t start = A_lcl.graph.row_map(lclRow);
140 const size_t end = A_lcl.graph.row_map(lclRow+1);
142 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, start, end), [&] (
const LO iEntry, value_type& lsum) {
143 const auto A_val = A_lcl.values(iEntry);
144 const auto lclCol = A_lcl.graph.entries(iEntry);
145 if (iEntry < offRankOffset)
146 lsum += A_val * X_domainmap_lcl(lclCol,v);
147 else if (!skipOffRank)
148 lsum += A_val * X_colmap_lcl(lclCol,v);
152 Kokkos::single(Kokkos::PerThread(dev),[&] () {
153 R_lcl(lclRow,v) = B_lcl(lclRow,v) - A_x;
164 template<
class AMatrix,
class MV,
class ConstMV,
class Offsets,
bool is_MV>
167 using execution_space =
typename AMatrix::execution_space;
168 using LO =
typename AMatrix::non_const_ordinal_type;
169 using value_type =
typename AMatrix::non_const_value_type;
170 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
171 using team_member =
typename team_policy::member_type;
172 using ATV = Kokkos::ArithTraits<value_type>;
175 ConstMV X_colmap_lcl;
182 const ConstMV& X_colmap_lcl_,
184 const int rows_per_team_,
187 X_colmap_lcl(X_colmap_lcl_),
189 rows_per_team(rows_per_team_),
193 KOKKOS_INLINE_FUNCTION
194 void operator() (
const team_member& dev)
const
197 Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (
const LO& loop) {
198 const LO lclRow =
static_cast<LO
> (dev.league_rank ()) * rows_per_team + loop;
200 if (lclRow >= A_lcl.numRows ()) {
204 const LO offRankOffset = offsets(lclRow);
205 const size_t end = A_lcl.graph.row_map(lclRow+1);
209 value_type A_x = ATV::zero ();
211 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, offRankOffset, end), [&] (
const LO iEntry, value_type& lsum) {
212 const auto A_val = A_lcl.values(iEntry);
213 const auto lclCol = A_lcl.graph.entries(iEntry);
214 lsum += A_val * X_colmap_lcl(lclCol,0);
217 Kokkos::single(Kokkos::PerThread(dev),[&] () {
218 R_lcl(lclRow,0) -= A_x;
223 const LO numVectors =
static_cast<LO
>(X_colmap_lcl.extent(1));
225 for(LO v=0; v<numVectors; v++) {
227 value_type A_x = ATV::zero ();
229 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, offRankOffset, end), [&] (
const LO iEntry, value_type& lsum) {
230 const auto A_val = A_lcl.values(iEntry);
231 const auto lclCol = A_lcl.graph.entries(iEntry);
232 lsum += A_val * X_colmap_lcl(lclCol,v);
235 Kokkos::single(Kokkos::PerThread(dev),[&] () {
236 R_lcl(lclRow,v) -= A_x;
245 template<
class SC,
class LO,
class GO,
class NO>
250 const Kokkos::View<const size_t*, typename NO::device_type>& offsets,
253 using Teuchos::NO_TRANS;
259 using offset_type = Kokkos::View<const size_t*, typename NO::device_type>;
262 const_local_view_device_type X_colmap_lcl = X_colmap.
getLocalViewDevice(Access::ReadOnly);
265 const_local_view_device_type X_domainmap_lcl;
269 TEUCHOS_TEST_FOR_EXCEPTION
271 "X.getNumVectors() = " << X_colmap.
getNumVectors () <<
" != "
273 TEUCHOS_TEST_FOR_EXCEPTION
275 A.
getColMap ()->getLocalNumElements (), std::runtime_error,
276 "X has the wrong number of local rows. "
278 "A.getColMap()->getLocalNumElements() = " <<
279 A.
getColMap ()->getLocalNumElements () <<
".");
280 TEUCHOS_TEST_FOR_EXCEPTION
282 A.
getRowMap ()->getLocalNumElements (), std::runtime_error,
283 "R has the wrong number of local rows. "
285 "A.getRowMap()->getLocalNumElements() = " <<
286 A.
getRowMap ()->getLocalNumElements () <<
".");
287 TEUCHOS_TEST_FOR_EXCEPTION
289 A.
getRowMap ()->getLocalNumElements (), std::runtime_error,
290 "B has the wrong number of local rows. "
292 "A.getRowMap()->getLocalNumElements() = " <<
293 A.
getRowMap ()->getLocalNumElements () <<
".");
295 TEUCHOS_TEST_FOR_EXCEPTION
296 (! A.
isFillComplete (), std::runtime_error,
"The matrix A is not "
297 "fill complete. You must call fillComplete() (possibly with "
298 "domain and range Map arguments) without an intervening "
299 "resumeFill() call before you may call this method.");
300 TEUCHOS_TEST_FOR_EXCEPTION
302 std::runtime_error,
"X, Y and B must be constant stride.");
305 TEUCHOS_TEST_FOR_EXCEPTION
306 ((X_colmap_lcl.data () == R_lcl.data () && X_colmap_lcl.data () !=
nullptr) ||
307 (X_colmap_lcl.data () == B_lcl.data () && X_colmap_lcl.data () !=
nullptr),
308 std::runtime_error,
"X, Y and R may not alias one another.");
312 if (!fusedResidual) {
313 SC one = Teuchos::ScalarTraits<SC>::one();
315 SC zero = Teuchos::ScalarTraits<SC>::zero();
318 A.
localApply(X_colmap,R,Teuchos::NO_TRANS, one, zero);
323 if (A_lcl.numRows() == 0) {
327 int64_t numLocalRows = A_lcl.numRows ();
328 int64_t myNnz = A_lcl.nnz();
331 int vector_length = -1;
332 int64_t rows_per_thread = -1;
334 using execution_space =
typename CrsMatrix<SC,LO,GO,NO>::execution_space;
335 using policy_type =
typename Kokkos::TeamPolicy<execution_space>;
337 int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
338 int64_t worksets = (B_lcl.extent (0) + rows_per_team - 1) / rows_per_team;
340 policy_type policy (1, 1);
342 policy = policy_type (worksets, Kokkos::AUTO, vector_length);
345 policy = policy_type (worksets, team_size, vector_length);
348 bool is_vector = (X_colmap_lcl.extent(1) == 1);
352 if (X_domainmap ==
nullptr) {
354 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,false,false>;
355 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
356 Kokkos::parallel_for(
"residual-vector",policy,func);
361 X_domainmap_lcl = X_domainmap->getLocalViewDevice(Access::ReadOnly);
362 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,true,false>;
363 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
364 Kokkos::parallel_for(
"residual-vector",policy,func);
370 if (X_domainmap ==
nullptr) {
372 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,false,false>;
373 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
374 Kokkos::parallel_for(
"residual-multivector",policy,func);
379 X_domainmap_lcl = X_domainmap->getLocalViewDevice(Access::ReadOnly);
380 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,true,false>;
381 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
382 Kokkos::parallel_for(
"residual-multivector",policy,func);
389 template<
class SC,
class LO,
class GO,
class NO>
390 void localResidualWithCommCompOverlap(
const CrsMatrix<SC,LO,GO,NO> & A,
391 MultiVector<SC,LO,GO,NO> & X_colmap,
392 const MultiVector<SC,LO,GO,NO> & B,
393 MultiVector<SC,LO,GO,NO> & R,
394 const Kokkos::View<const size_t*, typename NO::device_type>& offsets,
395 const MultiVector<SC,LO,GO,NO> & X_domainmap) {
397 using Teuchos::NO_TRANS;
399 using import_type =
typename CrsMatrix<SC,LO,GO,NO>::import_type;
401 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localResidualWithCommCompOverlap");
403 using local_matrix_device_type =
typename CrsMatrix<SC,LO,GO,NO>::local_matrix_device_type;
404 using local_view_device_type =
typename MultiVector<SC,LO,GO,NO>::dual_view_type::t_dev::non_const_type;
405 using const_local_view_device_type =
typename MultiVector<SC,LO,GO,NO>::dual_view_type::t_dev::const_type;
406 using offset_type = Kokkos::View<const size_t*, typename NO::device_type>;
408 local_matrix_device_type A_lcl = A.getLocalMatrixDevice ();
409 const_local_view_device_type X_colmap_lcl = X_colmap.getLocalViewDevice(Access::ReadOnly);
410 const_local_view_device_type B_lcl = B.getLocalViewDevice(Access::ReadOnly);
411 local_view_device_type R_lcl = R.getLocalViewDevice(Access::OverwriteAll);
412 const_local_view_device_type X_domainmap_lcl = X_domainmap.getLocalViewDevice(Access::ReadOnly);;
416 TEUCHOS_TEST_FOR_EXCEPTION
417 (X_colmap.getNumVectors () != R.getNumVectors (), std::runtime_error,
418 "X.getNumVectors() = " << X_colmap.getNumVectors () <<
" != "
419 "R.getNumVectors() = " << R.getNumVectors () <<
".");
420 TEUCHOS_TEST_FOR_EXCEPTION
421 (X_colmap.getLocalLength () !=
422 A.getColMap ()->getLocalNumElements (), std::runtime_error,
423 "X has the wrong number of local rows. "
424 "X.getLocalLength() = " << X_colmap.getLocalLength () <<
" != "
425 "A.getColMap()->getLocalNumElements() = " <<
426 A.getColMap ()->getLocalNumElements () <<
".");
427 TEUCHOS_TEST_FOR_EXCEPTION
428 (R.getLocalLength () !=
429 A.getRowMap ()->getLocalNumElements (), std::runtime_error,
430 "R has the wrong number of local rows. "
431 "R.getLocalLength() = " << R.getLocalLength () <<
" != "
432 "A.getRowMap()->getLocalNumElements() = " <<
433 A.getRowMap ()->getLocalNumElements () <<
".");
434 TEUCHOS_TEST_FOR_EXCEPTION
435 (B.getLocalLength () !=
436 A.getRowMap ()->getLocalNumElements (), std::runtime_error,
437 "B has the wrong number of local rows. "
438 "B.getLocalLength() = " << B.getLocalLength () <<
" != "
439 "A.getRowMap()->getLocalNumElements() = " <<
440 A.getRowMap ()->getLocalNumElements () <<
".");
442 TEUCHOS_TEST_FOR_EXCEPTION
443 (! A.isFillComplete (), std::runtime_error,
"The matrix A is not "
444 "fill complete. You must call fillComplete() (possibly with "
445 "domain and range Map arguments) without an intervening "
446 "resumeFill() call before you may call this method.");
447 TEUCHOS_TEST_FOR_EXCEPTION
448 (! X_colmap.isConstantStride () || ! R.isConstantStride () || ! B.isConstantStride (),
449 std::runtime_error,
"X, Y and B must be constant stride.");
452 TEUCHOS_TEST_FOR_EXCEPTION
453 ((X_colmap_lcl.data () == R_lcl.data () && X_colmap_lcl.data () !=
nullptr) ||
454 (X_colmap_lcl.data () == B_lcl.data () && X_colmap_lcl.data () !=
nullptr),
455 std::runtime_error,
"X, Y and R may not alias one another.");
458 if (A_lcl.numRows() == 0) {
462 int64_t numLocalRows = A_lcl.numRows ();
463 int64_t myNnz = A_lcl.nnz();
466 int vector_length = -1;
467 int64_t rows_per_thread = -1;
469 using execution_space =
typename CrsMatrix<SC,LO,GO,NO>::execution_space;
470 using policy_type =
typename Kokkos::TeamPolicy<execution_space>;
472 int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
473 int64_t worksets = (B_lcl.extent (0) + rows_per_team - 1) / rows_per_team;
475 policy_type policy (1, 1);
477 policy = policy_type (worksets, Kokkos::AUTO, vector_length);
480 policy = policy_type (worksets, team_size, vector_length);
483 bool is_vector = (X_colmap_lcl.extent(1) == 1);
487 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,true,true>;
488 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
489 Kokkos::parallel_for(
"residual-vector",policy,func);
491 RCP<const import_type> importer = A.getGraph ()->getImporter ();
492 X_colmap.endImport (X_domainmap, *importer,
INSERT,
true);
494 Kokkos::fence(
"Tpetra::localResidualWithCommCompOverlap-1");
496 using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false>;
497 functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
498 Kokkos::parallel_for(
"residual-vector-offrank",policy,func2);
503 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,true,true>;
504 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
505 Kokkos::parallel_for(
"residual-multivector",policy,func);
507 RCP<const import_type> importer = A.getGraph ()->getImporter ();
508 X_colmap.endImport (X_domainmap, *importer,
INSERT,
true);
510 Kokkos::fence(
"Tpetra::localResidualWithCommCompOverlap-2");
512 using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true>;
513 functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
514 Kokkos::parallel_for(
"residual-vector-offrank",policy,func2);
521 template<
class SC,
class LO,
class GO,
class NO>
529 using Teuchos::rcp_const_cast;
530 using Teuchos::rcpFromRef;
535 if (overlapCommunicationAndComputation)
536 TEUCHOS_ASSERT(skipCopyAndPermuteIfPossible);
543 bool restrictedMode =
false;
548 SC one = Teuchos::ScalarTraits<SC>::one(), negone = -one, zero = Teuchos::ScalarTraits<SC>::zero();
549 Aop.
apply(X_in,R_in,Teuchos::NO_TRANS, one, zero);
550 R_in.
update(one,B_in,negone);
559 using offset_type =
typename graph_type::offset_device_view_type;
562 const bool R_is_replicated =
571 RCP<const import_type> importer = A.
getGraph ()->getImporter ();
572 RCP<const export_type> exporter = A.
getGraph ()->getExporter ();
578 if (importer.is_null ()) {
593 X_colMap = rcp_const_cast<MV> (rcpFromRef (X_in) );
605 restrictedMode = skipCopyAndPermuteIfPossible && importer->isLocallyFitted();
607 if (debug && restrictedMode) {
608 TEUCHOS_TEST_FOR_EXCEPTION
609 (!importer->getTargetMap()->isLocallyFitted(*importer->getSourceMap()), std::runtime_error,
610 "Source map and target map are not locally fitted, but Tpetra::residual thinks they are.");
614 X_colMap->beginImport (X_in, *importer,
INSERT, restrictedMode);
625 if(exporter.is_null()) {
630 R_rowMap = rcpFromRef (R_in);
639 RCP<const MV> B_rowMap;
640 if(exporter.is_null()) {
646 B_rowMap = rcp_const_cast<
const MV> (B_rowMapNonConst);
649 B_rowMap = rcpFromRef (B_in);
655 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::residual: B Import");
657 B_rowMapNonConst->doImport(B_in, *exporter,
ADD);
658 B_rowMap = rcp_const_cast<
const MV> (B_rowMapNonConst);
666 if (! exporter.is_null ()) {
667 if ( ! importer.is_null ())
668 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
669 if (restrictedMode && !importer.is_null ())
670 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in);
672 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets);
675 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::residual: R Export");
684 if (overlapCommunicationAndComputation) {
685 localResidualWithCommCompOverlap (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, X_in);
687 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
688 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in);
691 if ( ! importer.is_null ())
692 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
693 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets);
710 if (R_is_replicated) {
711 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::residual: Reduce Y");
723 #endif // TPETRA_DETAILS_RESIDUAL_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
One or more distributed dense vectors.
virtual void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const =0
Computes the operator-multivector application.
bool isDistributed() const
Whether this is a globally distributed object.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
Functor for computing R -= A_offRank*X_colmap.
offsets_view_type offsets_
Offsets (output argument)
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Insert new values that don't currently exist.
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Abstract interface for operators (e.g., matrices and preconditioners).
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Functor for computing the residual.
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
void start()
Start the deep_copy counter.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
void reduce()
Sum values of a locally replicated multivector across all processes.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
void residual(const Operator< SC, LO, GO, NO > &A, const MultiVector< SC, LO, GO, NO > &X, const MultiVector< SC, LO, GO, NO > &B, MultiVector< SC, LO, GO, NO > &R)
Computes R = B - A * X.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.