41 #ifndef TPETRA_DETAILS_RESIDUAL_HPP
42 #define TPETRA_DETAILS_RESIDUAL_HPP
44 #include "TpetraCore_config.h"
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_LocalCrsMatrixOperator.hpp"
47 #include "Tpetra_MultiVector.hpp"
48 #include "Teuchos_RCP.hpp"
49 #include "Teuchos_ScalarTraits.hpp"
52 #include "KokkosSparse_spmv_impl.hpp"
70 template<
class AMatrix,
class MV,
class ConstMV,
class Offsets,
bool is_MV,
bool restrictedMode,
bool skipOffRank>
73 using execution_space =
typename AMatrix::execution_space;
74 using LO =
typename AMatrix::non_const_ordinal_type;
75 using value_type =
typename AMatrix::non_const_value_type;
76 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
77 using team_member =
typename team_policy::member_type;
78 using ATV = Kokkos::ArithTraits<value_type>;
86 ConstMV X_domainmap_lcl;
90 const ConstMV& X_colmap_lcl_,
91 const ConstMV& B_lcl_,
93 const int rows_per_team_,
95 const ConstMV& X_domainmap_lcl_) :
97 X_colmap_lcl(X_colmap_lcl_),
100 rows_per_team(rows_per_team_),
102 X_domainmap_lcl(X_domainmap_lcl_)
105 KOKKOS_INLINE_FUNCTION
106 void operator() (
const team_member& dev)
const
109 Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (
const LO& loop) {
110 const LO lclRow =
static_cast<LO
> (dev.league_rank ()) * rows_per_team + loop;
112 if (lclRow >= A_lcl.numRows ()) {
118 value_type A_x = ATV::zero ();
120 if (!restrictedMode) {
121 const auto A_row = A_lcl.rowConst(lclRow);
122 const LO row_length =
static_cast<LO
> (A_row.length);
124 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, row_length), [&] (
const LO iEntry, value_type& lsum) {
125 const auto A_val = A_row.value(iEntry);
126 lsum += A_val * X_colmap_lcl(A_row.colidx(iEntry),0);
132 const LO offRankOffset = offsets(lclRow);
133 const size_t start = A_lcl.graph.row_map(lclRow);
134 const size_t end = A_lcl.graph.row_map(lclRow+1);
136 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, start, end), [&] (
const LO iEntry, value_type& lsum) {
137 const auto A_val = A_lcl.values(iEntry);
138 const auto lclCol = A_lcl.graph.entries(iEntry);
139 if (iEntry < offRankOffset)
140 lsum += A_val * X_domainmap_lcl(lclCol,0);
141 else if (!skipOffRank)
142 lsum += A_val * X_colmap_lcl(lclCol,0);
146 Kokkos::single(Kokkos::PerThread(dev),[&] () {
147 R_lcl(lclRow,0) = B_lcl(lclRow,0) - A_x;
152 const LO numVectors =
static_cast<LO
>(X_colmap_lcl.extent(1));
154 for(LO v=0; v<numVectors; v++) {
156 value_type A_x = ATV::zero ();
158 if (!restrictedMode) {
160 const auto A_row = A_lcl.rowConst(lclRow);
161 const LO row_length =
static_cast<LO
> (A_row.length);
163 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, row_length), [&] (
const LO iEntry, value_type& lsum) {
164 const auto A_val = A_row.value(iEntry);
165 lsum += A_val * X_colmap_lcl(A_row.colidx(iEntry),v);
169 const LO offRankOffset = offsets(lclRow);
170 const size_t start = A_lcl.graph.row_map(lclRow);
171 const size_t end = A_lcl.graph.row_map(lclRow+1);
173 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, start, end), [&] (
const LO iEntry, value_type& lsum) {
174 const auto A_val = A_lcl.values(iEntry);
175 const auto lclCol = A_lcl.graph.entries(iEntry);
176 if (iEntry < offRankOffset)
177 lsum += A_val * X_domainmap_lcl(lclCol,v);
178 else if (!skipOffRank)
179 lsum += A_val * X_colmap_lcl(lclCol,v);
183 Kokkos::single(Kokkos::PerThread(dev),[&] () {
184 R_lcl(lclRow,v) = B_lcl(lclRow,v) - A_x;
195 template<
class AMatrix,
class MV,
class ConstMV,
class Offsets,
bool is_MV>
198 using execution_space =
typename AMatrix::execution_space;
199 using LO =
typename AMatrix::non_const_ordinal_type;
200 using value_type =
typename AMatrix::non_const_value_type;
201 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
202 using team_member =
typename team_policy::member_type;
203 using ATV = Kokkos::ArithTraits<value_type>;
206 ConstMV X_colmap_lcl;
213 const ConstMV& X_colmap_lcl_,
215 const int rows_per_team_,
218 X_colmap_lcl(X_colmap_lcl_),
220 rows_per_team(rows_per_team_),
224 KOKKOS_INLINE_FUNCTION
225 void operator() (
const team_member& dev)
const
228 Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (
const LO& loop) {
229 const LO lclRow =
static_cast<LO
> (dev.league_rank ()) * rows_per_team + loop;
231 if (lclRow >= A_lcl.numRows ()) {
235 const LO offRankOffset = offsets(lclRow);
236 const size_t end = A_lcl.graph.row_map(lclRow+1);
240 value_type A_x = ATV::zero ();
242 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, offRankOffset, end), [&] (
const LO iEntry, value_type& lsum) {
243 const auto A_val = A_lcl.values(iEntry);
244 const auto lclCol = A_lcl.graph.entries(iEntry);
245 lsum += A_val * X_colmap_lcl(lclCol,0);
248 Kokkos::single(Kokkos::PerThread(dev),[&] () {
249 R_lcl(lclRow,0) -= A_x;
254 const LO numVectors =
static_cast<LO
>(X_colmap_lcl.extent(1));
256 for(LO v=0; v<numVectors; v++) {
258 value_type A_x = ATV::zero ();
260 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, offRankOffset, end), [&] (
const LO iEntry, value_type& lsum) {
261 const auto A_val = A_lcl.values(iEntry);
262 const auto lclCol = A_lcl.graph.entries(iEntry);
263 lsum += A_val * X_colmap_lcl(lclCol,v);
266 Kokkos::single(Kokkos::PerThread(dev),[&] () {
267 R_lcl(lclRow,v) -= A_x;
276 template<
class SC,
class LO,
class GO,
class NO>
281 const Kokkos::View<const size_t*, typename NO::device_type>& offsets,
284 using Teuchos::NO_TRANS;
290 using offset_type = Kokkos::View<const size_t*, typename NO::device_type>;
293 const_local_view_device_type X_colmap_lcl = X_colmap.
getLocalViewDevice(Access::ReadOnly);
296 const_local_view_device_type X_domainmap_lcl;
300 TEUCHOS_TEST_FOR_EXCEPTION
302 "X.getNumVectors() = " << X_colmap.
getNumVectors () <<
" != "
304 TEUCHOS_TEST_FOR_EXCEPTION
306 A.
getColMap ()->getLocalNumElements (), std::runtime_error,
307 "X has the wrong number of local rows. "
309 "A.getColMap()->getLocalNumElements() = " <<
310 A.
getColMap ()->getLocalNumElements () <<
".");
311 TEUCHOS_TEST_FOR_EXCEPTION
313 A.
getRowMap ()->getLocalNumElements (), std::runtime_error,
314 "R has the wrong number of local rows. "
316 "A.getRowMap()->getLocalNumElements() = " <<
317 A.
getRowMap ()->getLocalNumElements () <<
".");
318 TEUCHOS_TEST_FOR_EXCEPTION
320 A.
getRowMap ()->getLocalNumElements (), std::runtime_error,
321 "B has the wrong number of local rows. "
323 "A.getRowMap()->getLocalNumElements() = " <<
324 A.
getRowMap ()->getLocalNumElements () <<
".");
326 TEUCHOS_TEST_FOR_EXCEPTION
327 (! A.
isFillComplete (), std::runtime_error,
"The matrix A is not "
328 "fill complete. You must call fillComplete() (possibly with "
329 "domain and range Map arguments) without an intervening "
330 "resumeFill() call before you may call this method.");
331 TEUCHOS_TEST_FOR_EXCEPTION
333 std::runtime_error,
"X, Y and B must be constant stride.");
336 TEUCHOS_TEST_FOR_EXCEPTION
337 ((X_colmap_lcl.data () == R_lcl.data () && X_colmap_lcl.data () !=
nullptr) ||
338 (X_colmap_lcl.data () == B_lcl.data () && X_colmap_lcl.data () !=
nullptr),
339 std::runtime_error,
"X, Y and R may not alias one another.");
342 SC one = Teuchos::ScalarTraits<SC>::one(), negone = -one, zero = Teuchos::ScalarTraits<SC>::zero();
343 #ifdef TPETRA_DETAILS_USE_REFERENCE_RESIDUAL
346 A.
localApply(X_colmap,R,Teuchos::NO_TRANS, one, zero);
350 if (A_lcl.numRows() == 0) {
354 int64_t numLocalRows = A_lcl.numRows ();
355 int64_t myNnz = A_lcl.nnz();
359 LO maxRowImbalance = 0;
360 if(numLocalRows != 0)
367 lclOp->applyImbalancedRows (X_colmap_lcl, R_lcl, Teuchos::NO_TRANS, one, zero);
373 int vector_length = -1;
374 int64_t rows_per_thread = -1;
376 using execution_space =
typename CrsMatrix<SC,LO,GO,NO>::execution_space;
377 using policy_type =
typename Kokkos::TeamPolicy<execution_space>;
379 int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
380 int64_t worksets = (B_lcl.extent (0) + rows_per_team - 1) / rows_per_team;
382 policy_type policy (1, 1);
384 policy = policy_type (worksets, Kokkos::AUTO, vector_length);
387 policy = policy_type (worksets, team_size, vector_length);
390 bool is_vector = (X_colmap_lcl.extent(1) == 1);
394 if (X_domainmap ==
nullptr) {
396 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,false,false>;
397 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
398 Kokkos::parallel_for(
"residual-vector",policy,func);
403 X_domainmap_lcl = X_domainmap->getLocalViewDevice(Access::ReadOnly);
404 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,true,false>;
405 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
406 Kokkos::parallel_for(
"residual-vector",policy,func);
412 if (X_domainmap ==
nullptr) {
414 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,false,false>;
415 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
416 Kokkos::parallel_for(
"residual-multivector",policy,func);
421 X_domainmap_lcl = X_domainmap->getLocalViewDevice(Access::ReadOnly);
422 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,true,false>;
423 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
424 Kokkos::parallel_for(
"residual-multivector",policy,func);
432 template<
class SC,
class LO,
class GO,
class NO>
433 void localResidualWithCommCompOverlap(
const CrsMatrix<SC,LO,GO,NO> & A,
434 MultiVector<SC,LO,GO,NO> & X_colmap,
435 const MultiVector<SC,LO,GO,NO> & B,
436 MultiVector<SC,LO,GO,NO> & R,
437 const Kokkos::View<const size_t*, typename NO::device_type>& offsets,
438 const MultiVector<SC,LO,GO,NO> & X_domainmap) {
440 using Teuchos::NO_TRANS;
442 using import_type =
typename CrsMatrix<SC,LO,GO,NO>::import_type;
444 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localResidualWithCommCompOverlap");
446 using local_matrix_device_type =
typename CrsMatrix<SC,LO,GO,NO>::local_matrix_device_type;
447 using local_view_device_type =
typename MultiVector<SC,LO,GO,NO>::dual_view_type::t_dev::non_const_type;
448 using const_local_view_device_type =
typename MultiVector<SC,LO,GO,NO>::dual_view_type::t_dev::const_type;
449 using offset_type = Kokkos::View<const size_t*, typename NO::device_type>;
451 local_matrix_device_type A_lcl = A.getLocalMatrixDevice ();
452 const_local_view_device_type X_colmap_lcl = X_colmap.getLocalViewDevice(Access::ReadOnly);
453 const_local_view_device_type B_lcl = B.getLocalViewDevice(Access::ReadOnly);
454 local_view_device_type R_lcl = R.getLocalViewDevice(Access::OverwriteAll);
455 const_local_view_device_type X_domainmap_lcl = X_domainmap.getLocalViewDevice(Access::ReadOnly);;
459 TEUCHOS_TEST_FOR_EXCEPTION
460 (X_colmap.getNumVectors () != R.getNumVectors (), std::runtime_error,
461 "X.getNumVectors() = " << X_colmap.getNumVectors () <<
" != "
462 "R.getNumVectors() = " << R.getNumVectors () <<
".");
463 TEUCHOS_TEST_FOR_EXCEPTION
464 (X_colmap.getLocalLength () !=
465 A.getColMap ()->getLocalNumElements (), std::runtime_error,
466 "X has the wrong number of local rows. "
467 "X.getLocalLength() = " << X_colmap.getLocalLength () <<
" != "
468 "A.getColMap()->getLocalNumElements() = " <<
469 A.getColMap ()->getLocalNumElements () <<
".");
470 TEUCHOS_TEST_FOR_EXCEPTION
471 (R.getLocalLength () !=
472 A.getRowMap ()->getLocalNumElements (), std::runtime_error,
473 "R has the wrong number of local rows. "
474 "R.getLocalLength() = " << R.getLocalLength () <<
" != "
475 "A.getRowMap()->getLocalNumElements() = " <<
476 A.getRowMap ()->getLocalNumElements () <<
".");
477 TEUCHOS_TEST_FOR_EXCEPTION
478 (B.getLocalLength () !=
479 A.getRowMap ()->getLocalNumElements (), std::runtime_error,
480 "B has the wrong number of local rows. "
481 "B.getLocalLength() = " << B.getLocalLength () <<
" != "
482 "A.getRowMap()->getLocalNumElements() = " <<
483 A.getRowMap ()->getLocalNumElements () <<
".");
485 TEUCHOS_TEST_FOR_EXCEPTION
486 (! A.isFillComplete (), std::runtime_error,
"The matrix A is not "
487 "fill complete. You must call fillComplete() (possibly with "
488 "domain and range Map arguments) without an intervening "
489 "resumeFill() call before you may call this method.");
490 TEUCHOS_TEST_FOR_EXCEPTION
491 (! X_colmap.isConstantStride () || ! R.isConstantStride () || ! B.isConstantStride (),
492 std::runtime_error,
"X, Y and B must be constant stride.");
495 TEUCHOS_TEST_FOR_EXCEPTION
496 ((X_colmap_lcl.data () == R_lcl.data () && X_colmap_lcl.data () !=
nullptr) ||
497 (X_colmap_lcl.data () == B_lcl.data () && X_colmap_lcl.data () !=
nullptr),
498 std::runtime_error,
"X, Y and R may not alias one another.");
501 if (A_lcl.numRows() == 0) {
505 int64_t numLocalRows = A_lcl.numRows ();
506 int64_t myNnz = A_lcl.nnz();
524 int vector_length = -1;
525 int64_t rows_per_thread = -1;
527 using execution_space =
typename CrsMatrix<SC,LO,GO,NO>::execution_space;
528 using policy_type =
typename Kokkos::TeamPolicy<execution_space>;
530 int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
531 int64_t worksets = (B_lcl.extent (0) + rows_per_team - 1) / rows_per_team;
533 policy_type policy (1, 1);
535 policy = policy_type (worksets, Kokkos::AUTO, vector_length);
538 policy = policy_type (worksets, team_size, vector_length);
541 bool is_vector = (X_colmap_lcl.extent(1) == 1);
545 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,true,true>;
546 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
547 Kokkos::parallel_for(
"residual-vector",policy,func);
549 RCP<const import_type> importer = A.getGraph ()->getImporter ();
550 X_colmap.endImport (X_domainmap, *importer,
INSERT,
true);
552 Kokkos::fence(
"Tpetra::localResidualWithCommCompOverlap-1");
554 using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false>;
555 functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
556 Kokkos::parallel_for(
"residual-vector-offrank",policy,func2);
561 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,true,true>;
562 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
563 Kokkos::parallel_for(
"residual-multivector",policy,func);
565 RCP<const import_type> importer = A.getGraph ()->getImporter ();
566 X_colmap.endImport (X_domainmap, *importer,
INSERT,
true);
568 Kokkos::fence(
"Tpetra::localResidualWithCommCompOverlap-2");
570 using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true>;
571 functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
572 Kokkos::parallel_for(
"residual-vector-offrank",policy,func2);
579 template<
class SC,
class LO,
class GO,
class NO>
587 using Teuchos::rcp_const_cast;
588 using Teuchos::rcpFromRef;
593 if (overlapCommunicationAndComputation)
594 TEUCHOS_ASSERT(skipCopyAndPermuteIfPossible);
601 bool restrictedMode =
false;
606 SC one = Teuchos::ScalarTraits<SC>::one(), negone = -one, zero = Teuchos::ScalarTraits<SC>::zero();
607 Aop.
apply(X_in,R_in,Teuchos::NO_TRANS, one, zero);
608 R_in.
update(one,B_in,negone);
617 using offset_type =
typename graph_type::offset_device_view_type;
620 const bool R_is_replicated =
629 RCP<const import_type> importer = A.
getGraph ()->getImporter ();
630 RCP<const export_type> exporter = A.
getGraph ()->getExporter ();
636 if (importer.is_null ()) {
651 X_colMap = rcp_const_cast<MV> (rcpFromRef (X_in) );
663 restrictedMode = skipCopyAndPermuteIfPossible && importer->isLocallyFitted();
665 if (debug && restrictedMode) {
666 TEUCHOS_TEST_FOR_EXCEPTION
667 (!importer->getTargetMap()->isLocallyFitted(*importer->getSourceMap()), std::runtime_error,
668 "Source map and target map are not locally fitted, but Tpetra::residual thinks they are.");
672 X_colMap->beginImport (X_in, *importer,
INSERT, restrictedMode);
683 if(exporter.is_null()) {
688 R_rowMap = rcpFromRef (R_in);
697 RCP<const MV> B_rowMap;
698 if(exporter.is_null()) {
704 B_rowMap = rcp_const_cast<
const MV> (B_rowMapNonConst);
707 B_rowMap = rcpFromRef (B_in);
713 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::residual: B Import");
715 B_rowMapNonConst->doImport(B_in, *exporter,
ADD);
716 B_rowMap = rcp_const_cast<
const MV> (B_rowMapNonConst);
724 if (! exporter.is_null ()) {
725 if ( ! importer.is_null ())
726 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
727 if (restrictedMode && !importer.is_null ())
728 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in);
730 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets);
733 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::residual: R Export");
742 if (overlapCommunicationAndComputation) {
743 localResidualWithCommCompOverlap (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, X_in);
745 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
746 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in);
749 if ( ! importer.is_null ())
750 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
751 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets);
768 if (R_is_replicated) {
769 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::residual: Reduce Y");
781 #endif // TPETRA_DETAILS_RESIDUAL_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
One or more distributed dense vectors.
virtual void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const =0
Computes the operator-multivector application.
bool isDistributed() const
Whether this is a globally distributed object.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Functor for computing R -= A_offRank*X_colmap.
offsets_view_type offsets_
Offsets (output argument)
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
Insert new values that don't currently exist.
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Abstract interface for operators (e.g., matrices and preconditioners).
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Functor for computing the residual.
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
void start()
Start the deep_copy counter.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
void reduce()
Sum values of a locally replicated multivector across all processes.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
void residual(const Operator< SC, LO, GO, NO > &A, const MultiVector< SC, LO, GO, NO > &X, const MultiVector< SC, LO, GO, NO > &B, MultiVector< SC, LO, GO, NO > &R)
Computes R = B - A * X.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
std::shared_ptr< local_multiply_op_type > getLocalMultiplyOperator() const
The local sparse matrix operator (a wrapper of getLocalMatrixDevice() that supports local matrix-vect...
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.