41 #ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
42 #define TPETRA_BLOCKCRSMATRIX_DEF_HPP
50 #include "Tpetra_BlockMultiVector.hpp"
53 #include "Teuchos_TimeMonitor.hpp"
54 #ifdef HAVE_TPETRA_DEBUG
56 #endif // HAVE_TPETRA_DEBUG
58 #include "KokkosSparse_spmv.hpp"
65 struct BlockCrsRowStruct {
66 T totalNumEntries, totalNumBytes, maxRowLength;
67 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
68 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
69 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
70 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
75 KOKKOS_INLINE_FUNCTION
76 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
77 a.totalNumEntries += b.totalNumEntries;
78 a.totalNumBytes += b.totalNumBytes;
79 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
82 template<
typename T,
typename ExecSpace>
83 struct BlockCrsReducer {
84 typedef BlockCrsReducer reducer;
86 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
89 KOKKOS_INLINE_FUNCTION
90 BlockCrsReducer(value_type &val) : value(&val) {}
92 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
93 KOKKOS_INLINE_FUNCTION
void join(value_type &dst,
const value_type &src)
const { dst += src; }
94 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
95 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
96 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
105 template<
class Scalar,
class LO,
class GO,
class Node>
106 class GetLocalDiagCopy {
108 typedef typename Node::device_type device_type;
109 typedef size_t diag_offset_type;
110 typedef Kokkos::View<
const size_t*, device_type,
111 Kokkos::MemoryUnmanaged> diag_offsets_type;
112 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
114 typedef typename local_graph_device_type::row_map_type row_offsets_type;
115 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
116 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
117 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
120 GetLocalDiagCopy (
const diag_type& diag,
121 const values_type& val,
122 const diag_offsets_type& diagOffsets,
123 const row_offsets_type& ptr,
124 const LO blockSize) :
126 diagOffsets_ (diagOffsets),
128 blockSize_ (blockSize),
129 offsetPerBlock_ (blockSize_*blockSize_),
133 KOKKOS_INLINE_FUNCTION
void
134 operator() (
const LO& lclRowInd)
const
139 const size_t absOffset = ptr_[lclRowInd];
142 const size_t relOffset = diagOffsets_[lclRowInd];
145 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
150 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
151 const_little_block_type;
152 const_little_block_type D_in (val_.data () + pointOffset,
153 blockSize_, blockSize_);
154 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
160 diag_offsets_type diagOffsets_;
161 row_offsets_type ptr_;
168 template<
class Scalar,
class LO,
class GO,
class Node>
170 BlockCrsMatrix<Scalar, LO, GO, Node>::
171 markLocalErrorAndGetStream ()
173 * (this->localError_) =
true;
174 if ((*errs_).is_null ()) {
175 *errs_ = Teuchos::rcp (
new std::ostringstream ());
180 template<
class Scalar,
class LO,
class GO,
class Node>
184 graph_ (Teuchos::rcp (new
map_type ()), 0),
185 blockSize_ (static_cast<LO> (0)),
186 X_colMap_ (new Teuchos::RCP<
BMV> ()),
187 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
188 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
190 localError_ (new bool (false)),
191 errs_ (new Teuchos::RCP<std::ostringstream> ())
195 template<
class Scalar,
class LO,
class GO,
class Node>
198 const LO blockSize) :
201 rowMeshMap_ (* (graph.getRowMap ())),
202 blockSize_ (blockSize),
203 X_colMap_ (new Teuchos::RCP<
BMV> ()),
204 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
205 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
206 offsetPerBlock_ (blockSize * blockSize),
207 localError_ (new bool (false)),
208 errs_ (new Teuchos::RCP<std::ostringstream> ())
212 TEUCHOS_TEST_FOR_EXCEPTION(
213 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
214 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
215 "rows (isSorted() is false). This class assumes sorted rows.");
217 graphRCP_ = Teuchos::rcpFromRef(graph_);
223 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
224 TEUCHOS_TEST_FOR_EXCEPTION(
225 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
226 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
227 " <= 0. The block size must be positive.");
235 const auto colPointMap = Teuchos::rcp
237 *pointImporter_ = Teuchos::rcp
241 auto local_graph_h = graph.getLocalGraphHost ();
242 auto ptr_h = local_graph_h.row_map;
243 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
246 auto ind_h = local_graph_h.entries;
247 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
251 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
252 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
256 template<
class Scalar,
class LO,
class GO,
class Node>
259 const typename local_matrix_device_type::values_type& values,
260 const LO blockSize) :
263 rowMeshMap_ (* (graph.getRowMap ())),
264 blockSize_ (blockSize),
265 X_colMap_ (new Teuchos::RCP<
BMV> ()),
266 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
267 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
268 offsetPerBlock_ (blockSize * blockSize),
269 localError_ (new bool (false)),
270 errs_ (new Teuchos::RCP<std::ostringstream> ())
273 TEUCHOS_TEST_FOR_EXCEPTION(
274 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
275 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
276 "rows (isSorted() is false). This class assumes sorted rows.");
278 graphRCP_ = Teuchos::rcpFromRef(graph_);
284 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
285 TEUCHOS_TEST_FOR_EXCEPTION(
286 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
287 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
288 " <= 0. The block size must be positive.");
296 const auto colPointMap = Teuchos::rcp
298 *pointImporter_ = Teuchos::rcp
302 auto local_graph_h = graph.getLocalGraphHost ();
303 auto ptr_h = local_graph_h.row_map;
304 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
307 auto ind_h = local_graph_h.entries;
308 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
311 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
312 TEUCHOS_ASSERT_EQUALITY(numValEnt, values.size());
313 val_ = decltype (val_) (values);
317 template<
class Scalar,
class LO,
class GO,
class Node>
322 const LO blockSize) :
325 rowMeshMap_ (* (graph.getRowMap ())),
326 domainPointMap_ (domainPointMap),
327 rangePointMap_ (rangePointMap),
328 blockSize_ (blockSize),
329 X_colMap_ (new Teuchos::RCP<
BMV> ()),
330 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
331 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
332 offsetPerBlock_ (blockSize * blockSize),
333 localError_ (new bool (false)),
334 errs_ (new Teuchos::RCP<std::ostringstream> ())
336 TEUCHOS_TEST_FOR_EXCEPTION(
337 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
338 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
339 "rows (isSorted() is false). This class assumes sorted rows.");
341 graphRCP_ = Teuchos::rcpFromRef(graph_);
347 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
348 TEUCHOS_TEST_FOR_EXCEPTION(
349 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
350 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
351 " <= 0. The block size must be positive.");
355 const auto colPointMap = Teuchos::rcp
357 *pointImporter_ = Teuchos::rcp
361 auto local_graph_h = graph.getLocalGraphHost ();
362 auto ptr_h = local_graph_h.row_map;
363 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
367 auto ind_h = local_graph_h.entries;
368 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
372 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
373 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
378 template<
class Scalar,
class LO,
class GO,
class Node>
379 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
384 return Teuchos::rcp (
new map_type (domainPointMap_));
387 template<
class Scalar,
class LO,
class GO,
class Node>
388 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
393 return Teuchos::rcp (
new map_type (rangePointMap_));
396 template<
class Scalar,
class LO,
class GO,
class Node>
397 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
401 return graph_.getRowMap();
404 template<
class Scalar,
class LO,
class GO,
class Node>
405 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
409 return graph_.getColMap();
412 template<
class Scalar,
class LO,
class GO,
class Node>
417 return graph_.getGlobalNumRows();
420 template<
class Scalar,
class LO,
class GO,
class Node>
425 return graph_.getLocalNumRows();
428 template<
class Scalar,
class LO,
class GO,
class Node>
433 return graph_.getLocalMaxNumRowEntries();
436 template<
class Scalar,
class LO,
class GO,
class Node>
441 Teuchos::ETransp mode,
446 TEUCHOS_TEST_FOR_EXCEPTION(
447 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
448 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
449 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
450 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
452 TEUCHOS_TEST_FOR_EXCEPTION(
454 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
455 "X and Y must both be constant stride");
459 const LO blockSize = getBlockSize ();
461 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
462 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
464 catch (std::invalid_argument& e) {
465 TEUCHOS_TEST_FOR_EXCEPTION(
466 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
467 "apply: Either the input MultiVector X or the output MultiVector Y "
468 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
469 "graph. BlockMultiVector's constructor threw the following exception: "
478 const_cast<this_BCRS_type&
> (*this).applyBlock (X_view, Y_view, mode, alpha, beta);
479 }
catch (std::invalid_argument& e) {
480 TEUCHOS_TEST_FOR_EXCEPTION(
481 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
482 "apply: The implementation method applyBlock complained about having "
483 "an invalid argument. It reported the following: " << e.what ());
484 }
catch (std::logic_error& e) {
485 TEUCHOS_TEST_FOR_EXCEPTION(
486 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
487 "apply: The implementation method applyBlock complained about a "
488 "possible bug in its implementation. It reported the following: "
490 }
catch (std::exception& e) {
491 TEUCHOS_TEST_FOR_EXCEPTION(
492 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
493 "apply: The implementation method applyBlock threw an exception which "
494 "is neither std::invalid_argument nor std::logic_error, but is a "
495 "subclass of std::exception. It reported the following: "
498 TEUCHOS_TEST_FOR_EXCEPTION(
499 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
500 "apply: The implementation method applyBlock threw an exception which "
501 "is not an instance of a subclass of std::exception. This probably "
502 "indicates a bug. Please report this to the Tpetra developers.");
506 template<
class Scalar,
class LO,
class GO,
class Node>
511 Teuchos::ETransp mode,
515 TEUCHOS_TEST_FOR_EXCEPTION(
517 "Tpetra::BlockCrsMatrix::applyBlock: "
518 "X and Y have different block sizes. X.getBlockSize() = "
522 if (mode == Teuchos::NO_TRANS) {
523 applyBlockNoTrans (X, Y, alpha, beta);
524 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
525 applyBlockTrans (X, Y, mode, alpha, beta);
527 TEUCHOS_TEST_FOR_EXCEPTION(
528 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
529 "applyBlock: Invalid 'mode' argument. Valid values are "
530 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
534 template<
class Scalar,
class LO,
class GO,
class Node>
545 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
546 "destMatrix is required to be null.");
550 RCP<crs_graph_type> srcGraph = rcp (
new crs_graph_type(this->getCrsGraph()));
551 RCP<crs_graph_type> destGraph = importAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, importer,
552 srcGraph->getDomainMap(),
553 srcGraph->getRangeMap());
557 destMatrix = rcp (
new this_BCRS_type (*destGraph, getBlockSize()));
562 template<
class Scalar,
class LO,
class GO,
class Node>
573 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
574 "destMatrix is required to be null.");
578 RCP<crs_graph_type> srcGraph = rcp (
new crs_graph_type(this->getCrsGraph()));
579 RCP<crs_graph_type> destGraph = exportAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, exporter,
580 srcGraph->getDomainMap(),
581 srcGraph->getRangeMap());
585 destMatrix = rcp (
new this_BCRS_type (*destGraph, getBlockSize()));
590 template<
class Scalar,
class LO,
class GO,
class Node>
595 auto val_d = val_.getDeviceView(Access::OverwriteAll);
599 template<
class Scalar,
class LO,
class GO,
class Node>
605 const LO numColInds)
const
607 std::vector<ptrdiff_t> offsets(numColInds);
608 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
609 const LO validCount = this->replaceLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
613 template <
class Scalar,
class LO,
class GO,
class Node>
617 Kokkos::MemoryUnmanaged>& offsets)
const
619 graph_.getLocalDiagOffsets (offsets);
622 template <
class Scalar,
class LO,
class GO,
class Node>
626 Kokkos::MemoryUnmanaged>& diag,
627 const Kokkos::View<
const size_t*, device_type,
628 Kokkos::MemoryUnmanaged>& offsets)
const
630 using Kokkos::parallel_for;
631 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
633 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getLocalNumElements ());
634 const LO blockSize = this->getBlockSize ();
635 TEUCHOS_TEST_FOR_EXCEPTION
636 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
637 static_cast<LO> (diag.extent (1)) < blockSize ||
638 static_cast<LO> (diag.extent (2)) < blockSize,
639 std::invalid_argument, prefix <<
640 "The input Kokkos::View is not big enough to hold all the data.");
641 TEUCHOS_TEST_FOR_EXCEPTION
642 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
643 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
644 "diagonal blocks " << lclNumMeshRows <<
".");
646 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
647 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
653 auto val_d = val_.getDeviceView(Access::ReadOnly);
654 parallel_for (policy_type (0, lclNumMeshRows),
655 functor_type (diag, val_d, offsets,
656 graph_.getLocalGraphDevice ().row_map, blockSize_));
659 template<
class Scalar,
class LO,
class GO,
class Node>
665 const LO numColInds)
const
667 std::vector<ptrdiff_t> offsets(numColInds);
668 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
669 const LO validCount = this->absMaxLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
674 template<
class Scalar,
class LO,
class GO,
class Node>
680 const LO numColInds)
const
682 std::vector<ptrdiff_t> offsets(numColInds);
683 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
684 const LO validCount = this->sumIntoLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
687 template<
class Scalar,
class LO,
class GO,
class Node>
691 nonconst_local_inds_host_view_type &Indices,
692 nonconst_values_host_view_type &Values,
693 size_t& NumEntries)
const
695 auto vals = getValuesHost(LocalRow);
696 const LO numInds = ptrHost_(LocalRow+1) - ptrHost_(LocalRow);
697 if (numInds > (LO)Indices.extent(0) || numInds*blockSize_*blockSize_ > (LO)Values.extent(0)) {
698 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
699 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
700 << numInds <<
" row entries");
702 const LO * colInds = indHost_.data() + ptrHost_(LocalRow);
703 for (LO i=0; i<numInds; ++i) {
704 Indices[i] = colInds[i];
706 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
709 NumEntries = numInds;
712 template<
class Scalar,
class LO,
class GO,
class Node>
718 const LO numColInds)
const
720 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
725 return static_cast<LO
> (0);
728 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
732 for (LO k = 0; k < numColInds; ++k) {
733 const LO relBlockOffset =
734 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
735 if (relBlockOffset != LINV) {
736 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
737 hint = relBlockOffset + 1;
745 template<
class Scalar,
class LO,
class GO,
class Node>
749 const ptrdiff_t offsets[],
751 const LO numOffsets)
const
753 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
758 return static_cast<LO
> (0);
765 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
766 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid ();
767 size_t pointOffset = 0;
770 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
771 const size_t blockOffset = offsets[k]*perBlockSize;
772 if (offsets[k] != STINV) {
774 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
783 template<
class Scalar,
class LO,
class GO,
class Node>
787 const ptrdiff_t offsets[],
789 const LO numOffsets)
const
791 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
796 return static_cast<LO
> (0);
798 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
> (vals);
799 auto val_out = getValuesHost(localRowInd);
800 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
802 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
803 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
804 size_t pointOffset = 0;
807 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
808 const size_t blockOffset = offsets[k]*perBlockSize;
809 if (blockOffset != STINV) {
810 little_block_type A_old = getNonConstLocalBlockFromInput (vOut, blockOffset);
811 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
820 template<
class Scalar,
class LO,
class GO,
class Node>
824 const ptrdiff_t offsets[],
826 const LO numOffsets)
const
828 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
833 return static_cast<LO
> (0);
841 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
842 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
843 size_t pointOffset = 0;
846 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
847 const size_t blockOffset = offsets[k]*perBlockSize;
848 if (blockOffset != STINV) {
850 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
851 AXPY (ONE, A_new, A_old);
858 template<
class Scalar,
class LO,
class GO,
class Node>
860 impl_scalar_type_dualview::t_host::const_type
864 return val_.getHostView(Access::ReadOnly);
867 template<
class Scalar,
class LO,
class GO,
class Node>
868 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
869 impl_scalar_type_dualview::t_dev::const_type
870 BlockCrsMatrix<Scalar, LO, GO, Node>::
871 getValuesDevice ()
const
873 return val_.getDeviceView(Access::ReadOnly);
876 template<
class Scalar,
class LO,
class GO,
class Node>
877 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
878 impl_scalar_type_dualview::t_host
882 return val_.getHostView(Access::ReadWrite);
885 template<
class Scalar,
class LO,
class GO,
class Node>
887 impl_scalar_type_dualview::t_dev
891 return val_.getDeviceView(Access::ReadWrite);
894 template<
class Scalar,
class LO,
class GO,
class Node>
895 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
896 impl_scalar_type_dualview::t_host::const_type
897 BlockCrsMatrix<Scalar, LO, GO, Node>::
898 getValuesHost (
const LO& lclRow)
const
900 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
901 auto val = val_.getHostView(Access::ReadOnly);
902 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
906 template<
class Scalar,
class LO,
class GO,
class Node>
908 impl_scalar_type_dualview::t_dev::const_type
912 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
913 auto val = val_.getDeviceView(Access::ReadOnly);
914 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
918 template<
class Scalar,
class LO,
class GO,
class Node>
923 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
924 auto val = val_.getHostView(Access::ReadWrite);
925 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
929 template<
class Scalar,
class LO,
class GO,
class Node>
934 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
935 auto val = val_.getDeviceView(Access::ReadWrite);
936 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
940 template<
class Scalar,
class LO,
class GO,
class Node>
945 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
946 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
947 return static_cast<LO
> (0);
949 return static_cast<LO
> (numEntInGraph);
953 template<
class Scalar,
class LO,
class GO,
class Node>
954 typename BlockCrsMatrix<Scalar, LO, GO, Node>::local_matrix_device_type
958 auto numCols = this->graph_.getColMap()->getLocalNumElements();
959 auto val = val_.getDeviceView(Access::ReadWrite);
960 const LO blockSize = this->getBlockSize ();
961 const auto graph = this->graph_.getLocalGraphDevice ();
963 return local_matrix_device_type(
"Tpetra::BlockCrsMatrix::lclMatrixDevice",
970 template<
class Scalar,
class LO,
class GO,
class Node>
975 const Teuchos::ETransp mode,
985 TEUCHOS_TEST_FOR_EXCEPTION(
986 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
987 "transpose and conjugate transpose modes are not yet implemented.");
990 template<
class Scalar,
class LO,
class GO,
class Node>
992 BlockCrsMatrix<Scalar, LO, GO, Node>::
993 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
994 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1000 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1001 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1002 const Scalar zero = STS::zero ();
1003 const Scalar one = STS::one ();
1004 RCP<const import_type>
import = graph_.getImporter ();
1006 RCP<const export_type> theExport = graph_.getExporter ();
1007 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1009 if (alpha == zero) {
1013 else if (beta != one) {
1018 const BMV* X_colMap = NULL;
1019 if (
import.is_null ()) {
1023 catch (std::exception& e) {
1024 TEUCHOS_TEST_FOR_EXCEPTION
1025 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1026 "operator= threw an exception: " << std::endl << e.what ());
1036 if ((*X_colMap_).is_null () ||
1037 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1038 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1039 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1040 static_cast<LO
> (X.getNumVectors ())));
1042 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1046 X_colMap = &(**X_colMap_);
1048 catch (std::exception& e) {
1049 TEUCHOS_TEST_FOR_EXCEPTION
1050 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1051 "operator= threw an exception: " << std::endl << e.what ());
1055 BMV* Y_rowMap = NULL;
1056 if (theExport.is_null ()) {
1059 else if ((*Y_rowMap_).is_null () ||
1060 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1061 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1062 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1063 static_cast<LO
> (X.getNumVectors ())));
1065 Y_rowMap = &(**Y_rowMap_);
1067 catch (std::exception& e) {
1068 TEUCHOS_TEST_FOR_EXCEPTION(
1069 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1070 "operator= threw an exception: " << std::endl << e.what ());
1075 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1077 catch (std::exception& e) {
1078 TEUCHOS_TEST_FOR_EXCEPTION
1079 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1080 "an exception: " << e.what ());
1083 TEUCHOS_TEST_FOR_EXCEPTION
1084 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1085 "an exception not a subclass of std::exception.");
1088 if (! theExport.is_null ()) {
1095 template <
class Scalar,
class LO,
class GO,
class Node>
1096 void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
1097 const BlockMultiVector<Scalar, LO, GO, Node> &X,
1098 BlockMultiVector<Scalar, LO, GO, Node> &Y,
const Scalar alpha,
1099 const Scalar beta) {
1101 "Tpetra::BlockCrsMatrix::localApplyBlockNoTrans");
1102 const impl_scalar_type alpha_impl = alpha;
1103 const auto graph = this->graph_.getLocalGraphDevice();
1105 mv_type X_mv = X.getMultiVectorView();
1106 mv_type Y_mv = Y.getMultiVectorView();
1107 auto X_lcl = X_mv.getLocalViewDevice(Access::ReadOnly);
1108 auto Y_lcl = Y_mv.getLocalViewDevice(Access::ReadWrite);
1110 #if KOKKOSKERNELS_VERSION >= 40299
1111 auto A_lcl = getLocalMatrixDevice();
1112 if(!applyHelper.get()) {
1114 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map);
1116 if(applyHelper->shouldUseIntRowptrs())
1118 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
1120 &applyHelper->handle_int, KokkosSparse::NoTranspose,
1121 alpha_impl, A_lcl_int_rowptrs, X_lcl, beta, Y_lcl);
1126 &applyHelper->handle, KokkosSparse::NoTranspose,
1127 alpha_impl, A_lcl, X_lcl, beta, Y_lcl);
1130 auto A_lcl = getLocalMatrixDevice();
1131 KokkosSparse::spmv(KokkosSparse::NoTranspose, alpha_impl, A_lcl, X_lcl, beta,
1137 template<
class Scalar,
class LO,
class GO,
class Node>
1139 BlockCrsMatrix<Scalar, LO, GO, Node>::
1140 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1141 const LO colIndexToFind,
1142 const LO hint)
const
1144 const size_t absStartOffset = ptrHost_[localRowIndex];
1145 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1146 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1148 const LO*
const curInd = indHost_.data () + absStartOffset;
1151 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1158 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1163 const LO maxNumEntriesForLinearSearch = 32;
1164 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1169 const LO* beg = curInd;
1170 const LO* end = curInd + numEntriesInRow;
1171 std::pair<const LO*, const LO*> p =
1172 std::equal_range (beg, end, colIndexToFind);
1173 if (p.first != p.second) {
1175 relOffset =
static_cast<LO
> (p.first - beg);
1179 for (LO k = 0; k < numEntriesInRow; ++k) {
1180 if (colIndexToFind == curInd[k]) {
1190 template<
class Scalar,
class LO,
class GO,
class Node>
1192 BlockCrsMatrix<Scalar, LO, GO, Node>::
1193 offsetPerBlock ()
const
1195 return offsetPerBlock_;
1198 template<
class Scalar,
class LO,
class GO,
class Node>
1199 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1200 BlockCrsMatrix<Scalar, LO, GO, Node>::
1201 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1202 const size_t pointOffset)
const
1205 const LO rowStride = blockSize_;
1206 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1209 template<
class Scalar,
class LO,
class GO,
class Node>
1210 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1211 BlockCrsMatrix<Scalar, LO, GO, Node>::
1212 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1213 const size_t pointOffset)
const
1216 const LO rowStride = blockSize_;
1217 return little_block_type (val + pointOffset, blockSize_, rowStride);
1220 template<
class Scalar,
class LO,
class GO,
class Node>
1221 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1222 BlockCrsMatrix<Scalar, LO, GO, Node>::
1223 getNonConstLocalBlockFromInputHost (impl_scalar_type* val,
1224 const size_t pointOffset)
const
1227 const LO rowStride = blockSize_;
1228 const size_t bs2 = blockSize_ * blockSize_;
1229 return little_block_host_type (val + bs2 * pointOffset, blockSize_, rowStride);
1232 template<
class Scalar,
class LO,
class GO,
class Node>
1233 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1234 BlockCrsMatrix<Scalar, LO, GO, Node>::
1235 getLocalBlockDeviceNonConst (
const LO localRowInd,
const LO localColInd)
const
1237 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1239 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1240 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1241 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1242 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1243 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesDeviceNonConst();
1244 auto r_val = getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1248 return little_block_type ();
1252 template<
class Scalar,
class LO,
class GO,
class Node>
1253 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1254 BlockCrsMatrix<Scalar, LO, GO, Node>::
1255 getLocalBlockHostNonConst (
const LO localRowInd,
const LO localColInd)
const
1257 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1259 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1260 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1261 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1262 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1263 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesHostNonConst();
1264 auto r_val = getNonConstLocalBlockFromInputHost (vals.data(), absBlockOffset);
1268 return little_block_host_type ();
1273 template<
class Scalar,
class LO,
class GO,
class Node>
1275 BlockCrsMatrix<Scalar, LO, GO, Node>::
1276 checkSizes (const ::Tpetra::SrcDistObject& source)
1279 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_BCRS_type;
1280 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1283 std::ostream& err = markLocalErrorAndGetStream ();
1284 err <<
"checkSizes: The source object of the Import or Export "
1285 "must be a BlockCrsMatrix with the same template parameters as the "
1286 "target object." << endl;
1291 if (src->getBlockSize () != this->getBlockSize ()) {
1292 std::ostream& err = markLocalErrorAndGetStream ();
1293 err <<
"checkSizes: The source and target objects of the Import or "
1294 <<
"Export must have the same block sizes. The source's block "
1295 <<
"size = " << src->getBlockSize () <<
" != the target's block "
1296 <<
"size = " << this->getBlockSize () <<
"." << endl;
1298 if (! src->graph_.isFillComplete ()) {
1299 std::ostream& err = markLocalErrorAndGetStream ();
1300 err <<
"checkSizes: The source object of the Import or Export is "
1301 "not fill complete. Both source and target objects must be fill "
1302 "complete." << endl;
1304 if (! this->graph_.isFillComplete ()) {
1305 std::ostream& err = markLocalErrorAndGetStream ();
1306 err <<
"checkSizes: The target object of the Import or Export is "
1307 "not fill complete. Both source and target objects must be fill "
1308 "complete." << endl;
1310 if (src->graph_.getColMap ().is_null ()) {
1311 std::ostream& err = markLocalErrorAndGetStream ();
1312 err <<
"checkSizes: The source object of the Import or Export does "
1313 "not have a column Map. Both source and target objects must have "
1314 "column Maps." << endl;
1316 if (this->graph_.getColMap ().is_null ()) {
1317 std::ostream& err = markLocalErrorAndGetStream ();
1318 err <<
"checkSizes: The target object of the Import or Export does "
1319 "not have a column Map. Both source and target objects must have "
1320 "column Maps." << endl;
1324 return ! (* (this->localError_));
1327 template<
class Scalar,
class LO,
class GO,
class Node>
1331 (const ::Tpetra::SrcDistObject& source,
1332 const size_t numSameIDs,
1339 using ::Tpetra::Details::Behavior;
1341 using ::Tpetra::Details::ProfilingRegion;
1345 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1346 const bool debug = Behavior::debug();
1347 const bool verbose = Behavior::verbose();
1352 std::ostringstream os;
1353 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1354 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
1359 if (* (this->localError_)) {
1360 std::ostream& err = this->markLocalErrorAndGetStream ();
1362 <<
"The target object of the Import or Export is already in an error state."
1371 std::ostringstream os;
1372 os << prefix << endl
1375 std::cerr << os.str ();
1381 if (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0)) {
1382 std::ostream& err = this->markLocalErrorAndGetStream ();
1384 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent (0)
1385 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
1389 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
1390 std::ostream& err = this->markLocalErrorAndGetStream ();
1392 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1397 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1399 std::ostream& err = this->markLocalErrorAndGetStream ();
1401 <<
"The source (input) object of the Import or "
1402 "Export is either not a BlockCrsMatrix, or does not have the right "
1403 "template parameters. checkSizes() should have caught this. "
1404 "Please report this bug to the Tpetra developers." << endl;
1408 bool lclErr =
false;
1409 #ifdef HAVE_TPETRA_DEBUG
1410 std::set<LO> invalidSrcCopyRows;
1411 std::set<LO> invalidDstCopyRows;
1412 std::set<LO> invalidDstCopyCols;
1413 std::set<LO> invalidDstPermuteCols;
1414 std::set<LO> invalidPermuteFromRows;
1415 #endif // HAVE_TPETRA_DEBUG
1424 #ifdef HAVE_TPETRA_DEBUG
1425 const map_type& srcRowMap = * (src->graph_.getRowMap ());
1426 #endif // HAVE_TPETRA_DEBUG
1427 const map_type& dstRowMap = * (this->graph_.getRowMap ());
1428 const map_type& srcColMap = * (src->graph_.getColMap ());
1429 const map_type& dstColMap = * (this->graph_.getColMap ());
1430 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
1432 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.extent(0));
1434 std::ostringstream os;
1436 <<
"canUseLocalColumnIndices: "
1437 << (canUseLocalColumnIndices ?
"true" :
"false")
1438 <<
", numPermute: " << numPermute
1440 std::cerr << os.str ();
1443 const auto permuteToLIDsHost = permuteToLIDs.view_host();
1444 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
1446 if (canUseLocalColumnIndices) {
1448 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1449 #ifdef HAVE_TPETRA_DEBUG
1452 invalidSrcCopyRows.insert (localRow);
1455 #endif // HAVE_TPETRA_DEBUG
1457 local_inds_host_view_type lclSrcCols;
1458 values_host_view_type vals;
1462 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1463 if (numEntries > 0) {
1464 LO err = this->replaceLocalValues (localRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1465 if (err != numEntries) {
1468 #ifdef HAVE_TPETRA_DEBUG
1469 invalidDstCopyRows.insert (localRow);
1470 #endif // HAVE_TPETRA_DEBUG
1478 for (LO k = 0; k < numEntries; ++k) {
1481 #ifdef HAVE_TPETRA_DEBUG
1482 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
1483 #endif // HAVE_TPETRA_DEBUG
1492 for (
size_t k = 0; k < numPermute; ++k) {
1493 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1494 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1496 local_inds_host_view_type lclSrcCols;
1497 values_host_view_type vals;
1499 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1500 if (numEntries > 0) {
1501 LO err = this->replaceLocalValues (dstLclRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1502 if (err != numEntries) {
1504 #ifdef HAVE_TPETRA_DEBUG
1505 for (LO c = 0; c < numEntries; ++c) {
1507 invalidDstPermuteCols.insert (lclSrcCols[c]);
1510 #endif // HAVE_TPETRA_DEBUG
1517 const size_t maxNumEnt = src->graph_.getLocalMaxNumRowEntries ();
1518 Teuchos::Array<LO> lclDstCols (maxNumEnt);
1521 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1522 local_inds_host_view_type lclSrcCols;
1523 values_host_view_type vals;
1529 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1530 }
catch (std::exception& e) {
1532 std::ostringstream os;
1533 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1534 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1535 << localRow <<
", src->getLocalRowView() threw an exception: "
1537 std::cerr << os.str ();
1542 if (numEntries > 0) {
1543 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1546 std::ostringstream os;
1547 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1548 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1549 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
1550 << maxNumEnt << endl;
1551 std::cerr << os.str ();
1557 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1558 for (LO j = 0; j < numEntries; ++j) {
1560 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1562 #ifdef HAVE_TPETRA_DEBUG
1563 invalidDstCopyCols.insert (lclDstColsView[j]);
1564 #endif // HAVE_TPETRA_DEBUG
1569 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1570 }
catch (std::exception& e) {
1572 std::ostringstream os;
1573 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1574 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1575 << localRow <<
", this->replaceLocalValues() threw an exception: "
1577 std::cerr << os.str ();
1581 if (err != numEntries) {
1584 std::ostringstream os;
1585 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1586 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
1587 "localRow " << localRow <<
", this->replaceLocalValues "
1588 "returned " << err <<
" instead of numEntries = "
1589 << numEntries << endl;
1590 std::cerr << os.str ();
1598 for (
size_t k = 0; k < numPermute; ++k) {
1599 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1600 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1602 local_inds_host_view_type lclSrcCols;
1603 values_host_view_type vals;
1607 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1608 }
catch (std::exception& e) {
1610 std::ostringstream os;
1611 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1612 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
1613 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
1614 <<
", src->getLocalRowView() threw an exception: " << e.what ();
1615 std::cerr << os.str ();
1620 if (numEntries > 0) {
1621 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1627 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1628 for (LO j = 0; j < numEntries; ++j) {
1630 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1632 #ifdef HAVE_TPETRA_DEBUG
1633 invalidDstPermuteCols.insert (lclDstColsView[j]);
1634 #endif // HAVE_TPETRA_DEBUG
1637 LO err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1638 if (err != numEntries) {
1647 std::ostream& err = this->markLocalErrorAndGetStream ();
1648 #ifdef HAVE_TPETRA_DEBUG
1649 err <<
"copyAndPermute: The graph structure of the source of the "
1650 "Import or Export must be a subset of the graph structure of the "
1652 err <<
"invalidSrcCopyRows = [";
1653 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
1654 it != invalidSrcCopyRows.end (); ++it) {
1656 typename std::set<LO>::const_iterator itp1 = it;
1658 if (itp1 != invalidSrcCopyRows.end ()) {
1662 err <<
"], invalidDstCopyRows = [";
1663 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
1664 it != invalidDstCopyRows.end (); ++it) {
1666 typename std::set<LO>::const_iterator itp1 = it;
1668 if (itp1 != invalidDstCopyRows.end ()) {
1672 err <<
"], invalidDstCopyCols = [";
1673 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
1674 it != invalidDstCopyCols.end (); ++it) {
1676 typename std::set<LO>::const_iterator itp1 = it;
1678 if (itp1 != invalidDstCopyCols.end ()) {
1682 err <<
"], invalidDstPermuteCols = [";
1683 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
1684 it != invalidDstPermuteCols.end (); ++it) {
1686 typename std::set<LO>::const_iterator itp1 = it;
1688 if (itp1 != invalidDstPermuteCols.end ()) {
1692 err <<
"], invalidPermuteFromRows = [";
1693 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
1694 it != invalidPermuteFromRows.end (); ++it) {
1696 typename std::set<LO>::const_iterator itp1 = it;
1698 if (itp1 != invalidPermuteFromRows.end ()) {
1704 err <<
"copyAndPermute: The graph structure of the source of the "
1705 "Import or Export must be a subset of the graph structure of the "
1707 #endif // HAVE_TPETRA_DEBUG
1711 std::ostringstream os;
1712 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1713 const bool lclSuccess = ! (* (this->localError_));
1714 os <<
"*** Proc " << myRank <<
": copyAndPermute "
1715 << (lclSuccess ?
"succeeded" :
"FAILED");
1719 os <<
": error messages: " << this->errorMessages ();
1721 std::cerr << os.str ();
1745 template<
class LO,
class GO>
1747 packRowCount (
const size_t numEnt,
1748 const size_t numBytesPerValue,
1749 const size_t blkSize)
1751 using ::Tpetra::Details::PackTraits;
1762 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1763 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
1764 return numEntLen + gidsLen + valsLen;
1778 template<
class ST,
class LO,
class GO>
1780 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
1781 const size_t offset,
1782 const size_t numBytes,
1785 using Kokkos::subview;
1786 using ::Tpetra::Details::PackTraits;
1788 if (numBytes == 0) {
1790 return static_cast<size_t> (0);
1795 TEUCHOS_TEST_FOR_EXCEPTION
1796 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1797 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
1799 const char*
const inBuf = imports.data () + offset;
1801 TEUCHOS_TEST_FOR_EXCEPTION
1802 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1803 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
1805 return static_cast<size_t> (numEntLO);
1812 template<
class ST,
class LO,
class GO>
1814 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
1815 const size_t offset,
1816 const size_t numEnt,
1817 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
1818 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
1819 const size_t numBytesPerValue,
1820 const size_t blockSize)
1822 using Kokkos::subview;
1823 using ::Tpetra::Details::PackTraits;
1829 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1832 const LO numEntLO =
static_cast<size_t> (numEnt);
1834 const size_t numEntBeg = offset;
1836 const size_t gidsBeg = numEntBeg + numEntLen;
1837 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1838 const size_t valsBeg = gidsBeg + gidsLen;
1839 const size_t valsLen = numScalarEnt * numBytesPerValue;
1841 char*
const numEntOut = exports.data () + numEntBeg;
1842 char*
const gidsOut = exports.data () + gidsBeg;
1843 char*
const valsOut = exports.data () + valsBeg;
1845 size_t numBytesOut = 0;
1850 Kokkos::pair<int, size_t> p;
1851 p = PackTraits<GO>::packArray (gidsOut, gidsIn.data (), numEnt);
1852 errorCode += p.first;
1853 numBytesOut += p.second;
1855 p = PackTraits<ST>::packArray (valsOut, valsIn.data (), numScalarEnt);
1856 errorCode += p.first;
1857 numBytesOut += p.second;
1860 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1861 TEUCHOS_TEST_FOR_EXCEPTION
1862 (numBytesOut != expectedNumBytes, std::logic_error,
1863 "packRowForBlockCrs: numBytesOut = " << numBytesOut
1864 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1866 TEUCHOS_TEST_FOR_EXCEPTION
1867 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
1868 "PackTraits::packArray returned a nonzero error code");
1874 template<
class ST,
class LO,
class GO>
1876 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
1877 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
1878 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
1879 const size_t offset,
1880 const size_t numBytes,
1881 const size_t numEnt,
1882 const size_t numBytesPerValue,
1883 const size_t blockSize)
1885 using ::Tpetra::Details::PackTraits;
1887 if (numBytes == 0) {
1891 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1892 TEUCHOS_TEST_FOR_EXCEPTION
1893 (static_cast<size_t> (imports.extent (0)) <= offset,
1894 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1895 << imports.extent (0) <<
" <= offset = " << offset <<
".");
1896 TEUCHOS_TEST_FOR_EXCEPTION
1897 (static_cast<size_t> (imports.extent (0)) < offset + numBytes,
1898 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1899 << imports.extent (0) <<
" < offset + numBytes = "
1900 << (offset + numBytes) <<
".");
1905 const size_t numEntBeg = offset;
1907 const size_t gidsBeg = numEntBeg + numEntLen;
1908 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1909 const size_t valsBeg = gidsBeg + gidsLen;
1910 const size_t valsLen = numScalarEnt * numBytesPerValue;
1912 const char*
const numEntIn = imports.data () + numEntBeg;
1913 const char*
const gidsIn = imports.data () + gidsBeg;
1914 const char*
const valsIn = imports.data () + valsBeg;
1916 size_t numBytesOut = 0;
1920 TEUCHOS_TEST_FOR_EXCEPTION
1921 (static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
1922 "unpackRowForBlockCrs: Expected number of entries " << numEnt
1923 <<
" != actual number of entries " << numEntOut <<
".");
1926 Kokkos::pair<int, size_t> p;
1927 p = PackTraits<GO>::unpackArray (gidsOut.data (), gidsIn, numEnt);
1928 errorCode += p.first;
1929 numBytesOut += p.second;
1931 p = PackTraits<ST>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
1932 errorCode += p.first;
1933 numBytesOut += p.second;
1936 TEUCHOS_TEST_FOR_EXCEPTION
1937 (numBytesOut != numBytes, std::logic_error,
1938 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1939 <<
" != numBytes = " << numBytes <<
".");
1941 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1942 TEUCHOS_TEST_FOR_EXCEPTION
1943 (numBytesOut != expectedNumBytes, std::logic_error,
1944 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1945 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1947 TEUCHOS_TEST_FOR_EXCEPTION
1948 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
1949 "PackTraits::unpackArray returned a nonzero error code");
1955 template<
class Scalar,
class LO,
class GO,
class Node>
1959 (const ::Tpetra::SrcDistObject& source,
1964 Kokkos::DualView<
size_t*,
1966 size_t& constantNumPackets)
1968 using ::Tpetra::Details::Behavior;
1970 using ::Tpetra::Details::ProfilingRegion;
1971 using ::Tpetra::Details::PackTraits;
1973 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space host_exec;
1977 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
1979 const bool debug = Behavior::debug();
1980 const bool verbose = Behavior::verbose();
1985 std::ostringstream os;
1986 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1987 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
1992 if (* (this->localError_)) {
1993 std::ostream& err = this->markLocalErrorAndGetStream ();
1995 <<
"The target object of the Import or Export is already in an error state."
2004 std::ostringstream os;
2005 os << prefix << std::endl
2009 std::cerr << os.str ();
2015 if (exportLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2016 std::ostream& err = this->markLocalErrorAndGetStream ();
2018 <<
"exportLIDs.extent(0) = " << exportLIDs.extent (0)
2019 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2020 <<
"." << std::endl;
2023 if (exportLIDs.need_sync_host ()) {
2024 std::ostream& err = this->markLocalErrorAndGetStream ();
2025 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
2029 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
2031 std::ostream& err = this->markLocalErrorAndGetStream ();
2033 <<
"The source (input) object of the Import or "
2034 "Export is either not a BlockCrsMatrix, or does not have the right "
2035 "template parameters. checkSizes() should have caught this. "
2036 "Please report this bug to the Tpetra developers." << std::endl;
2048 constantNumPackets = 0;
2052 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2053 const size_t numExportLIDs = exportLIDs.extent (0);
2054 size_t numBytesPerValue(0);
2056 auto val_host = val_.getHostView(Access::ReadOnly);
2058 PackTraits<impl_scalar_type>
2066 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
2069 auto exportLIDsHost = exportLIDs.view_host();
2070 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2071 numPacketsPerLID.modify_host ();
2073 rowReducerStruct = Impl::BlockCrsRowStruct<size_t>();
2074 for (
size_t i = 0; i < numExportLIDs; ++i) {
2075 const LO lclRow = exportLIDsHost(i);
2077 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid () ? 0 : numEnt);
2079 const size_t numBytes =
2080 packRowCount<LO, GO> (numEnt, numBytesPerValue, blockSize);
2081 numPacketsPerLIDHost(i) = numBytes;
2082 rowReducerStruct += Impl::BlockCrsRowStruct<size_t>(numEnt, numBytes, numEnt);
2089 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
2090 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
2091 const size_t maxRowLength = rowReducerStruct.maxRowLength;
2094 std::ostringstream os;
2096 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2098 std::cerr << os.str ();
2104 if(exports.extent(0) != totalNumBytes)
2106 const std::string oldLabel = exports.d_view.label ();
2107 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
2108 exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
2110 if (totalNumEntries > 0) {
2112 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs+1);
2114 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs+1);
2115 Kokkos::parallel_scan
2117 [=](
const size_t &i,
size_t &update,
const bool &
final) {
2118 if (
final) offset(i) = update;
2119 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
2122 if (offset(numExportLIDs) != totalNumBytes) {
2123 std::ostream& err = this->markLocalErrorAndGetStream ();
2125 <<
"At end of method, the final offset (in bytes) "
2126 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
2127 << totalNumBytes <<
". "
2128 <<
"Please report this bug to the Tpetra developers." << std::endl;
2142 exports.modify_host();
2144 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2146 policy_type(numExportLIDs, 1, 1)
2147 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2152 Kokkos::parallel_for
2154 [=](
const typename policy_type::member_type &member) {
2155 const size_t i = member.league_rank();
2156 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2157 gblColInds(member.team_scratch(0), maxRowLength);
2159 const LO lclRowInd = exportLIDsHost(i);
2160 local_inds_host_view_type lclColInds;
2161 values_host_view_type vals;
2166 src->getLocalRowView (lclRowInd, lclColInds, vals);
2167 const size_t numEnt = lclColInds.extent(0);
2170 for (
size_t j = 0; j < numEnt; ++j)
2177 const size_t numBytes =
2178 packRowForBlockCrs<impl_scalar_type, LO, GO>
2179 (exports.view_host(),
2182 Kokkos::View<const GO*, host_exec>(gblColInds.data(), maxRowLength),
2189 const size_t offsetDiff = offset(i+1) - offset(i);
2190 if (numBytes != offsetDiff) {
2191 std::ostringstream os;
2193 <<
"numBytes computed from packRowForBlockCrs is different from "
2194 <<
"precomputed offset values, LID = " << i << std::endl;
2195 std::cerr << os.str ();
2204 std::ostringstream os;
2205 const bool lclSuccess = ! (* (this->localError_));
2207 << (lclSuccess ?
"succeeded" :
"FAILED")
2209 std::cerr << os.str ();
2213 template<
class Scalar,
class LO,
class GO,
class Node>
2221 Kokkos::DualView<
size_t*,
2226 using ::Tpetra::Details::Behavior;
2228 using ::Tpetra::Details::ProfilingRegion;
2229 using ::Tpetra::Details::PackTraits;
2232 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
2234 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2235 const bool verbose = Behavior::verbose ();
2240 std::ostringstream os;
2241 auto map = this->graph_.getRowMap ();
2242 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
2243 const int myRank = comm.is_null () ? -1 : comm->getRank ();
2244 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2247 os <<
"Start" << endl;
2248 std::cerr << os.str ();
2253 if (* (this->localError_)) {
2254 std::ostream& err = this->markLocalErrorAndGetStream ();
2255 std::ostringstream os;
2256 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
2258 std::cerr << os.str ();
2267 if (importLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2268 std::ostream& err = this->markLocalErrorAndGetStream ();
2269 std::ostringstream os;
2270 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent (0)
2271 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2274 std::cerr << os.str ();
2280 if (combineMode !=
ADD && combineMode !=
INSERT &&
2282 combineMode !=
ZERO) {
2283 std::ostream& err = this->markLocalErrorAndGetStream ();
2284 std::ostringstream os;
2286 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
2287 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2290 std::cerr << os.str ();
2296 if (this->graph_.getColMap ().is_null ()) {
2297 std::ostream& err = this->markLocalErrorAndGetStream ();
2298 std::ostringstream os;
2299 os << prefix <<
"Target matrix's column Map is null." << endl;
2301 std::cerr << os.str ();
2310 const map_type& tgtColMap = * (this->graph_.getColMap ());
2313 const size_t blockSize = this->getBlockSize ();
2314 const size_t numImportLIDs = importLIDs.extent(0);
2321 size_t numBytesPerValue(0);
2323 auto val_host = val_.getHostView(Access::ReadOnly);
2325 PackTraits<impl_scalar_type>::packValueCount
2328 const size_t maxRowNumEnt = graph_.getLocalMaxNumRowEntries ();
2329 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
2332 std::ostringstream os;
2333 os << prefix <<
"combineMode: "
2334 << ::Tpetra::combineModeToString (combineMode)
2335 <<
", blockSize: " << blockSize
2336 <<
", numImportLIDs: " << numImportLIDs
2337 <<
", numBytesPerValue: " << numBytesPerValue
2338 <<
", maxRowNumEnt: " << maxRowNumEnt
2339 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
2340 std::cerr << os.str ();
2343 if (combineMode ==
ZERO || numImportLIDs == 0) {
2345 std::ostringstream os;
2346 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
2347 std::cerr << os.str ();
2354 if (imports.need_sync_host ()) {
2355 imports.sync_host ();
2365 if (imports.need_sync_host () || numPacketsPerLID.need_sync_host () ||
2366 importLIDs.need_sync_host ()) {
2367 std::ostream& err = this->markLocalErrorAndGetStream ();
2368 std::ostringstream os;
2369 os << prefix <<
"All input DualViews must be sync'd to host by now. "
2370 <<
"imports_nc.need_sync_host()="
2371 << (imports.need_sync_host () ?
"true" :
"false")
2372 <<
", numPacketsPerLID.need_sync_host()="
2373 << (numPacketsPerLID.need_sync_host () ?
"true" :
"false")
2374 <<
", importLIDs.need_sync_host()="
2375 << (importLIDs.need_sync_host () ?
"true" :
"false")
2378 std::cerr << os.str ();
2384 const auto importLIDsHost = importLIDs.view_host ();
2385 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host ();
2391 Kokkos::View<size_t*, host_exec> offset (
"offset", numImportLIDs+1);
2393 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numImportLIDs+1);
2394 Kokkos::parallel_scan
2395 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
2396 [=] (
const size_t &i,
size_t &update,
const bool &
final) {
2397 if (
final) offset(i) = update;
2398 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
2408 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2409 errorDuringUnpack (
"errorDuringUnpack");
2410 errorDuringUnpack () = 0;
2412 using policy_type = Kokkos::TeamPolicy<host_exec>;
2413 size_t scratch_per_row =
sizeof(GO) * maxRowNumEnt +
sizeof (LO) * maxRowNumEnt + numBytesPerValue * maxRowNumScalarEnt
2416 const auto policy = policy_type (numImportLIDs, 1, 1)
2417 .set_scratch_size (0, Kokkos::PerTeam (scratch_per_row));
2418 using host_scratch_space =
typename host_exec::scratch_memory_space;
2420 using pair_type = Kokkos::pair<size_t, size_t>;
2423 getValuesHostNonConst();
2425 Kokkos::parallel_for
2426 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
2427 [=] (
const typename policy_type::member_type& member) {
2428 const size_t i = member.league_rank();
2429 Kokkos::View<GO*, host_scratch_space> gblColInds
2430 (member.team_scratch (0), maxRowNumEnt);
2431 Kokkos::View<LO*, host_scratch_space> lclColInds
2432 (member.team_scratch (0), maxRowNumEnt);
2433 Kokkos::View<impl_scalar_type*, host_scratch_space> vals
2434 (member.team_scratch (0), maxRowNumScalarEnt);
2437 const size_t offval = offset(i);
2438 const LO lclRow = importLIDsHost(i);
2439 const size_t numBytes = numPacketsPerLIDHost(i);
2440 const size_t numEnt =
2441 unpackRowCount<impl_scalar_type, LO, GO>
2442 (imports.view_host (), offval, numBytes, numBytesPerValue);
2445 if (numEnt > maxRowNumEnt) {
2446 errorDuringUnpack() = 1;
2448 std::ostringstream os;
2450 <<
"At i = " << i <<
", numEnt = " << numEnt
2451 <<
" > maxRowNumEnt = " << maxRowNumEnt
2453 std::cerr << os.str ();
2458 const size_t numScalarEnt = numEnt*blockSize*blockSize;
2459 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
2460 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
2461 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
2466 size_t numBytesOut = 0;
2469 unpackRowForBlockCrs<impl_scalar_type, LO, GO>
2470 (Kokkos::View<GO*,host_exec>(gidsOut.data(), numEnt),
2471 Kokkos::View<impl_scalar_type*,host_exec>(valsOut.data(), numScalarEnt),
2472 imports.view_host(),
2473 offval, numBytes, numEnt,
2474 numBytesPerValue, blockSize);
2476 catch (std::exception& e) {
2477 errorDuringUnpack() = 1;
2479 std::ostringstream os;
2480 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
2481 << e.what () << endl;
2482 std::cerr << os.str ();
2487 if (numBytes != numBytesOut) {
2488 errorDuringUnpack() = 1;
2490 std::ostringstream os;
2492 <<
"At i = " << i <<
", numBytes = " << numBytes
2493 <<
" != numBytesOut = " << numBytesOut <<
"."
2495 std::cerr << os.str();
2501 for (
size_t k = 0; k < numEnt; ++k) {
2503 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
2505 std::ostringstream os;
2507 <<
"At i = " << i <<
", GID " << gidsOut(k)
2508 <<
" is not owned by the calling process."
2510 std::cerr << os.str();
2518 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
2519 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>
2521 if (combineMode ==
ADD) {
2522 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2523 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
2524 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2525 }
else if (combineMode ==
ABSMAX) {
2526 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2529 if (static_cast<LO> (numEnt) != numCombd) {
2530 errorDuringUnpack() = 1;
2532 std::ostringstream os;
2533 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
2534 <<
" != numCombd = " << numCombd <<
"."
2536 std::cerr << os.str ();
2544 if (errorDuringUnpack () != 0) {
2545 std::ostream& err = this->markLocalErrorAndGetStream ();
2546 err << prefix <<
"Unpacking failed.";
2548 err <<
" Please run again with the environment variable setting "
2549 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
2555 std::ostringstream os;
2556 const bool lclSuccess = ! (* (this->localError_));
2558 << (lclSuccess ?
"succeeded" :
"FAILED")
2560 std::cerr << os.str ();
2564 template<
class Scalar,
class LO,
class GO,
class Node>
2568 using Teuchos::TypeNameTraits;
2569 std::ostringstream os;
2570 os <<
"\"Tpetra::BlockCrsMatrix\": { "
2571 <<
"Template parameters: { "
2572 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
2573 <<
"LO: " << TypeNameTraits<LO>::name ()
2574 <<
"GO: " << TypeNameTraits<GO>::name ()
2575 <<
"Node: " << TypeNameTraits<Node>::name ()
2577 <<
", Label: \"" << this->getObjectLabel () <<
"\""
2578 <<
", Global dimensions: ["
2579 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2580 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
2581 <<
", Block size: " << getBlockSize ()
2587 template<
class Scalar,
class LO,
class GO,
class Node>
2591 const Teuchos::EVerbosityLevel verbLevel)
const
2593 using Teuchos::ArrayRCP;
2594 using Teuchos::CommRequest;
2595 using Teuchos::FancyOStream;
2596 using Teuchos::getFancyOStream;
2597 using Teuchos::ireceive;
2598 using Teuchos::isend;
2599 using Teuchos::outArg;
2600 using Teuchos::TypeNameTraits;
2601 using Teuchos::VERB_DEFAULT;
2602 using Teuchos::VERB_NONE;
2603 using Teuchos::VERB_LOW;
2604 using Teuchos::VERB_MEDIUM;
2606 using Teuchos::VERB_EXTREME;
2608 using Teuchos::wait;
2612 const Teuchos::EVerbosityLevel vl =
2613 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2615 if (vl == VERB_NONE) {
2620 Teuchos::OSTab tab0 (out);
2622 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
2623 Teuchos::OSTab tab1 (out);
2625 out <<
"Template parameters:" << endl;
2627 Teuchos::OSTab tab2 (out);
2628 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
2629 <<
"LO: " << TypeNameTraits<LO>::name () << endl
2630 <<
"GO: " << TypeNameTraits<GO>::name () << endl
2631 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
2633 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
2634 <<
"Global dimensions: ["
2635 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2636 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
2638 const LO blockSize = getBlockSize ();
2639 out <<
"Block size: " << blockSize << endl;
2642 if (vl >= VERB_MEDIUM) {
2643 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2644 const int myRank = comm.getRank ();
2646 out <<
"Row Map:" << endl;
2648 getRowMap()->describe (out, vl);
2650 if (! getColMap ().is_null ()) {
2651 if (getColMap() == getRowMap()) {
2653 out <<
"Column Map: same as row Map" << endl;
2658 out <<
"Column Map:" << endl;
2660 getColMap ()->describe (out, vl);
2663 if (! getDomainMap ().is_null ()) {
2664 if (getDomainMap () == getRowMap ()) {
2666 out <<
"Domain Map: same as row Map" << endl;
2669 else if (getDomainMap () == getColMap ()) {
2671 out <<
"Domain Map: same as column Map" << endl;
2676 out <<
"Domain Map:" << endl;
2678 getDomainMap ()->describe (out, vl);
2681 if (! getRangeMap ().is_null ()) {
2682 if (getRangeMap () == getDomainMap ()) {
2684 out <<
"Range Map: same as domain Map" << endl;
2687 else if (getRangeMap () == getRowMap ()) {
2689 out <<
"Range Map: same as row Map" << endl;
2694 out <<
"Range Map: " << endl;
2696 getRangeMap ()->describe (out, vl);
2701 if (vl >= VERB_EXTREME) {
2702 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2703 const int myRank = comm.getRank ();
2704 const int numProcs = comm.getSize ();
2707 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
2708 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
2709 FancyOStream& os = *osPtr;
2710 os <<
"Process " << myRank <<
":" << endl;
2711 Teuchos::OSTab tab2 (os);
2713 const map_type& meshRowMap = * (graph_.getRowMap ());
2714 const map_type& meshColMap = * (graph_.getColMap ());
2719 os <<
"Row " << meshGblRow <<
": {";
2721 local_inds_host_view_type lclColInds;
2722 values_host_view_type vals;
2724 this->getLocalRowView (meshLclRow, lclColInds, vals); numInds = lclColInds.extent(0);
2726 for (LO k = 0; k < numInds; ++k) {
2729 os <<
"Col " << gblCol <<
": [";
2730 for (LO i = 0; i < blockSize; ++i) {
2731 for (LO j = 0; j < blockSize; ++j) {
2732 os << vals[blockSize*blockSize*k + i*blockSize + j];
2733 if (j + 1 < blockSize) {
2737 if (i + 1 < blockSize) {
2742 if (k + 1 < numInds) {
2752 out << lclOutStrPtr->str ();
2753 lclOutStrPtr = Teuchos::null;
2756 const int sizeTag = 1337;
2757 const int dataTag = 1338;
2759 ArrayRCP<char> recvDataBuf;
2763 for (
int p = 1; p < numProcs; ++p) {
2766 ArrayRCP<size_t> recvSize (1);
2768 RCP<CommRequest<int> > recvSizeReq =
2769 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
2770 wait<int> (comm, outArg (recvSizeReq));
2771 const size_t numCharsToRecv = recvSize[0];
2778 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
2779 recvDataBuf.resize (numCharsToRecv + 1);
2781 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
2783 RCP<CommRequest<int> > recvDataReq =
2784 ireceive<int, char> (recvData, p, dataTag, comm);
2785 wait<int> (comm, outArg (recvDataReq));
2790 recvDataBuf[numCharsToRecv] =
'\0';
2791 out << recvDataBuf.getRawPtr ();
2793 else if (myRank == p) {
2797 const std::string stringToSend = lclOutStrPtr->str ();
2798 lclOutStrPtr = Teuchos::null;
2801 const size_t numCharsToSend = stringToSend.size ();
2802 ArrayRCP<size_t> sendSize (1);
2803 sendSize[0] = numCharsToSend;
2804 RCP<CommRequest<int> > sendSizeReq =
2805 isend<int, size_t> (sendSize, 0, sizeTag, comm);
2806 wait<int> (comm, outArg (sendSizeReq));
2814 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
2815 RCP<CommRequest<int> > sendDataReq =
2816 isend<int, char> (sendData, 0, dataTag, comm);
2817 wait<int> (comm, outArg (sendDataReq));
2823 template<
class Scalar,
class LO,
class GO,
class Node>
2824 Teuchos::RCP<const Teuchos::Comm<int> >
2828 return graph_.getComm();
2832 template<
class Scalar,
class LO,
class GO,
class Node>
2837 return graph_.getGlobalNumCols();
2841 template<
class Scalar,
class LO,
class GO,
class Node>
2846 return graph_.getLocalNumCols();
2849 template<
class Scalar,
class LO,
class GO,
class Node>
2854 return graph_.getIndexBase();
2857 template<
class Scalar,
class LO,
class GO,
class Node>
2862 return graph_.getGlobalNumEntries();
2865 template<
class Scalar,
class LO,
class GO,
class Node>
2870 return graph_.getLocalNumEntries();
2873 template<
class Scalar,
class LO,
class GO,
class Node>
2878 return graph_.getNumEntriesInGlobalRow(globalRow);
2882 template<
class Scalar,
class LO,
class GO,
class Node>
2887 return graph_.getGlobalMaxNumRowEntries();
2890 template<
class Scalar,
class LO,
class GO,
class Node>
2895 return graph_.hasColMap();
2899 template<
class Scalar,
class LO,
class GO,
class Node>
2904 return graph_.isLocallyIndexed();
2907 template<
class Scalar,
class LO,
class GO,
class Node>
2912 return graph_.isGloballyIndexed();
2915 template<
class Scalar,
class LO,
class GO,
class Node>
2920 return graph_.isFillComplete ();
2923 template<
class Scalar,
class LO,
class GO,
class Node>
2931 template<
class Scalar,
class LO,
class GO,
class Node>
2935 nonconst_global_inds_host_view_type &,
2936 nonconst_values_host_view_type &,
2939 TEUCHOS_TEST_FOR_EXCEPTION(
2940 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
2941 "This class doesn't support global matrix indexing.");
2945 template<
class Scalar,
class LO,
class GO,
class Node>
2949 global_inds_host_view_type &,
2950 values_host_view_type &)
const
2952 TEUCHOS_TEST_FOR_EXCEPTION(
2953 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
2954 "This class doesn't support global matrix indexing.");
2958 template<
class Scalar,
class LO,
class GO,
class Node>
2962 local_inds_host_view_type &colInds,
2963 values_host_view_type &vals)
const
2965 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
2966 colInds = local_inds_host_view_type();
2967 vals = values_host_view_type();
2970 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2971 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2972 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
2974 vals = getValuesHost (localRowInd);
2978 template<
class Scalar,
class LO,
class GO,
class Node>
2982 local_inds_host_view_type &colInds,
2983 nonconst_values_host_view_type &vals)
const
2985 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
2986 colInds = nonconst_local_inds_host_view_type();
2987 vals = nonconst_values_host_view_type();
2990 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2991 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2992 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
2999 template<
class Scalar,
class LO,
class GO,
class Node>
3004 const size_t lclNumMeshRows = graph_.getLocalNumRows ();
3006 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3007 graph_.getLocalDiagOffsets (diagOffsets);
3010 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3014 auto vals_host_out = val_.getHostView(Access::ReadOnly);
3018 size_t rowOffset = 0;
3020 LO bs = getBlockSize();
3021 for(
size_t r=0; r<getLocalNumRows(); r++)
3024 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3025 for(
int b=0; b<bs; b++)
3030 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3036 template<
class Scalar,
class LO,
class GO,
class Node>
3041 TEUCHOS_TEST_FOR_EXCEPTION(
3042 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3043 "not implemented.");
3047 template<
class Scalar,
class LO,
class GO,
class Node>
3052 TEUCHOS_TEST_FOR_EXCEPTION(
3053 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3054 "not implemented.");
3058 template<
class Scalar,
class LO,
class GO,
class Node>
3059 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3066 template<
class Scalar,
class LO,
class GO,
class Node>
3067 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3071 TEUCHOS_TEST_FOR_EXCEPTION(
3072 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3073 "not implemented.");
3083 #define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3084 template class BlockCrsMatrix< S, LO, GO, NODE >;
3086 #endif // TPETRA_BLOCKCRSMATRIX_DEF_HPP
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
Kokkos::View< impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
std::string description() const override
One-line description of this object.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
void exportAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Export< LO, GO, Node > &exporter) const
Import from this to the given destination matrix, and make the result fill complete.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
LO local_ordinal_type
The type of local indices.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
One or more distributed dense vectors.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Linear algebra kernels for small dense matrices and vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Scalar scalar_type
The type of entries in the matrix (that is, of each entry in each block).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
size_t getLocalNumRows() const override
get the local number of block rows
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
Kokkos::LayoutRight BlockCrsMatrixLittleBlockArrayLayout
give an option to use layoutleft
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
local_ordinal_type getMinLocalIndex() const
The minimum local index.
MultiVector for multiple degrees of freedom per mesh point.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
size_t global_size_t
Global size_t object.
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
global_size_t getGlobalNumRows() const override
get the global number of block rows
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value)
Replace current value at the specified location with specified values.
local_ordinal_type getMaxLocalIndex() const
The maximum local index on the calling process.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
CombineMode
Rule for combining data in an Import or Export.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
Replace old value with maximum of magnitudes of old and new values.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
Replace existing values with new values.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
Replace old values with zero.
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
char packet_type
Implementation detail; tells.
local_matrix_device_type getLocalMatrixDevice() const
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
A distributed dense vector.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Base class for distributed Tpetra objects that support data redistribution.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
void importAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Import< LO, GO, Node > &importer) const
Import from this to the given destination matrix, and make the result fill complete.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row. ...