41 #ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
42 #define TPETRA_BLOCKCRSMATRIX_DEF_HPP
50 #include "Tpetra_BlockMultiVector.hpp"
53 #include "Teuchos_TimeMonitor.hpp"
54 #ifdef HAVE_TPETRA_DEBUG
56 #endif // HAVE_TPETRA_DEBUG
58 #include "KokkosSparse_spmv.hpp"
65 struct BlockCrsRowStruct {
66 T totalNumEntries, totalNumBytes, maxRowLength;
67 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
68 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
69 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
70 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
75 KOKKOS_INLINE_FUNCTION
76 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
77 a.totalNumEntries += b.totalNumEntries;
78 a.totalNumBytes += b.totalNumBytes;
79 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
82 template<
typename T,
typename ExecSpace>
83 struct BlockCrsReducer {
84 typedef BlockCrsReducer reducer;
86 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
89 KOKKOS_INLINE_FUNCTION
90 BlockCrsReducer(value_type &val) : value(&val) {}
92 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
93 KOKKOS_INLINE_FUNCTION
void join(value_type &dst,
const value_type &src)
const { dst += src; }
94 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
95 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
96 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
105 template<
class Scalar,
class LO,
class GO,
class Node>
106 class GetLocalDiagCopy {
108 typedef typename Node::device_type device_type;
109 typedef size_t diag_offset_type;
110 typedef Kokkos::View<
const size_t*, device_type,
111 Kokkos::MemoryUnmanaged> diag_offsets_type;
112 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
114 typedef typename local_graph_device_type::row_map_type row_offsets_type;
115 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
116 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
117 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
120 GetLocalDiagCopy (
const diag_type& diag,
121 const values_type& val,
122 const diag_offsets_type& diagOffsets,
123 const row_offsets_type& ptr,
124 const LO blockSize) :
126 diagOffsets_ (diagOffsets),
128 blockSize_ (blockSize),
129 offsetPerBlock_ (blockSize_*blockSize_),
133 KOKKOS_INLINE_FUNCTION
void
134 operator() (
const LO& lclRowInd)
const
139 const size_t absOffset = ptr_[lclRowInd];
142 const size_t relOffset = diagOffsets_[lclRowInd];
145 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
150 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
151 const_little_block_type;
152 const_little_block_type D_in (val_.data () + pointOffset,
153 blockSize_, blockSize_);
154 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
160 diag_offsets_type diagOffsets_;
161 row_offsets_type ptr_;
168 template<
class Scalar,
class LO,
class GO,
class Node>
170 BlockCrsMatrix<Scalar, LO, GO, Node>::
171 markLocalErrorAndGetStream ()
173 * (this->localError_) =
true;
174 if ((*errs_).is_null ()) {
175 *errs_ = Teuchos::rcp (
new std::ostringstream ());
180 template<
class Scalar,
class LO,
class GO,
class Node>
184 graph_ (Teuchos::rcp (new
map_type ()), 0),
185 blockSize_ (static_cast<LO> (0)),
186 X_colMap_ (new Teuchos::RCP<
BMV> ()),
187 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
188 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
190 localError_ (new bool (false)),
191 errs_ (new Teuchos::RCP<std::ostringstream> ())
195 template<
class Scalar,
class LO,
class GO,
class Node>
198 const LO blockSize) :
201 rowMeshMap_ (* (graph.getRowMap ())),
202 blockSize_ (blockSize),
203 X_colMap_ (new Teuchos::RCP<
BMV> ()),
204 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
205 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
206 offsetPerBlock_ (blockSize * blockSize),
207 localError_ (new bool (false)),
208 errs_ (new Teuchos::RCP<std::ostringstream> ())
212 TEUCHOS_TEST_FOR_EXCEPTION(
213 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
214 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
215 "rows (isSorted() is false). This class assumes sorted rows.");
217 graphRCP_ = Teuchos::rcpFromRef(graph_);
223 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
224 TEUCHOS_TEST_FOR_EXCEPTION(
225 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
226 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
227 " <= 0. The block size must be positive.");
235 const auto colPointMap = Teuchos::rcp
237 *pointImporter_ = Teuchos::rcp
241 auto local_graph_h = graph.getLocalGraphHost ();
242 auto ptr_h = local_graph_h.row_map;
243 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
246 auto ind_h = local_graph_h.entries;
247 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
251 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
252 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
256 template<
class Scalar,
class LO,
class GO,
class Node>
259 const typename local_matrix_device_type::values_type& values,
260 const LO blockSize) :
263 rowMeshMap_ (* (graph.getRowMap ())),
264 blockSize_ (blockSize),
265 X_colMap_ (new Teuchos::RCP<
BMV> ()),
266 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
267 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
268 offsetPerBlock_ (blockSize * blockSize),
269 localError_ (new bool (false)),
270 errs_ (new Teuchos::RCP<std::ostringstream> ())
273 TEUCHOS_TEST_FOR_EXCEPTION(
274 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
275 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
276 "rows (isSorted() is false). This class assumes sorted rows.");
278 graphRCP_ = Teuchos::rcpFromRef(graph_);
284 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
285 TEUCHOS_TEST_FOR_EXCEPTION(
286 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
287 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
288 " <= 0. The block size must be positive.");
296 const auto colPointMap = Teuchos::rcp
298 *pointImporter_ = Teuchos::rcp
302 auto local_graph_h = graph.getLocalGraphHost ();
303 auto ptr_h = local_graph_h.row_map;
304 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
307 auto ind_h = local_graph_h.entries;
308 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
311 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
312 TEUCHOS_ASSERT_EQUALITY(numValEnt, values.size());
313 val_ = decltype (val_) (values);
317 template<
class Scalar,
class LO,
class GO,
class Node>
322 const LO blockSize) :
325 rowMeshMap_ (* (graph.getRowMap ())),
326 domainPointMap_ (domainPointMap),
327 rangePointMap_ (rangePointMap),
328 blockSize_ (blockSize),
329 X_colMap_ (new Teuchos::RCP<
BMV> ()),
330 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
331 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
332 offsetPerBlock_ (blockSize * blockSize),
333 localError_ (new bool (false)),
334 errs_ (new Teuchos::RCP<std::ostringstream> ())
336 TEUCHOS_TEST_FOR_EXCEPTION(
337 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
338 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
339 "rows (isSorted() is false). This class assumes sorted rows.");
341 graphRCP_ = Teuchos::rcpFromRef(graph_);
347 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
348 TEUCHOS_TEST_FOR_EXCEPTION(
349 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
350 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
351 " <= 0. The block size must be positive.");
355 const auto colPointMap = Teuchos::rcp
357 *pointImporter_ = Teuchos::rcp
361 auto local_graph_h = graph.getLocalGraphHost ();
362 auto ptr_h = local_graph_h.row_map;
363 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
367 auto ind_h = local_graph_h.entries;
368 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
372 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
373 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
378 template<
class Scalar,
class LO,
class GO,
class Node>
379 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
384 return Teuchos::rcp (
new map_type (domainPointMap_));
387 template<
class Scalar,
class LO,
class GO,
class Node>
388 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
393 return Teuchos::rcp (
new map_type (rangePointMap_));
396 template<
class Scalar,
class LO,
class GO,
class Node>
397 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
401 return graph_.getRowMap();
404 template<
class Scalar,
class LO,
class GO,
class Node>
405 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
409 return graph_.getColMap();
412 template<
class Scalar,
class LO,
class GO,
class Node>
417 return graph_.getGlobalNumRows();
420 template<
class Scalar,
class LO,
class GO,
class Node>
425 return graph_.getLocalNumRows();
428 template<
class Scalar,
class LO,
class GO,
class Node>
433 return graph_.getLocalMaxNumRowEntries();
436 template<
class Scalar,
class LO,
class GO,
class Node>
441 Teuchos::ETransp mode,
446 TEUCHOS_TEST_FOR_EXCEPTION(
447 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
448 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
449 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
450 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
454 const LO blockSize = getBlockSize ();
456 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
457 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
459 catch (std::invalid_argument& e) {
460 TEUCHOS_TEST_FOR_EXCEPTION(
461 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
462 "apply: Either the input MultiVector X or the output MultiVector Y "
463 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
464 "graph. BlockMultiVector's constructor threw the following exception: "
473 const_cast<this_BCRS_type&
> (*this).applyBlock (X_view, Y_view, mode, alpha, beta);
474 }
catch (std::invalid_argument& e) {
475 TEUCHOS_TEST_FOR_EXCEPTION(
476 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
477 "apply: The implementation method applyBlock complained about having "
478 "an invalid argument. It reported the following: " << e.what ());
479 }
catch (std::logic_error& e) {
480 TEUCHOS_TEST_FOR_EXCEPTION(
481 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
482 "apply: The implementation method applyBlock complained about a "
483 "possible bug in its implementation. It reported the following: "
485 }
catch (std::exception& e) {
486 TEUCHOS_TEST_FOR_EXCEPTION(
487 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
488 "apply: The implementation method applyBlock threw an exception which "
489 "is neither std::invalid_argument nor std::logic_error, but is a "
490 "subclass of std::exception. It reported the following: "
493 TEUCHOS_TEST_FOR_EXCEPTION(
494 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
495 "apply: The implementation method applyBlock threw an exception which "
496 "is not an instance of a subclass of std::exception. This probably "
497 "indicates a bug. Please report this to the Tpetra developers.");
501 template<
class Scalar,
class LO,
class GO,
class Node>
506 Teuchos::ETransp mode,
510 TEUCHOS_TEST_FOR_EXCEPTION(
512 "Tpetra::BlockCrsMatrix::applyBlock: "
513 "X and Y have different block sizes. X.getBlockSize() = "
517 if (mode == Teuchos::NO_TRANS) {
518 applyBlockNoTrans (X, Y, alpha, beta);
519 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
520 applyBlockTrans (X, Y, mode, alpha, beta);
522 TEUCHOS_TEST_FOR_EXCEPTION(
523 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
524 "applyBlock: Invalid 'mode' argument. Valid values are "
525 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
529 template<
class Scalar,
class LO,
class GO,
class Node>
540 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
541 "destMatrix is required to be null.");
545 RCP<crs_graph_type> srcGraph = rcp (
new crs_graph_type(this->getCrsGraph()));
546 RCP<crs_graph_type> destGraph = importAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, importer,
547 srcGraph->getDomainMap(),
548 srcGraph->getRangeMap());
552 destMatrix = rcp (
new this_BCRS_type (*destGraph, getBlockSize()));
557 template<
class Scalar,
class LO,
class GO,
class Node>
568 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
569 "destMatrix is required to be null.");
573 RCP<crs_graph_type> srcGraph = rcp (
new crs_graph_type(this->getCrsGraph()));
574 RCP<crs_graph_type> destGraph = exportAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, exporter,
575 srcGraph->getDomainMap(),
576 srcGraph->getRangeMap());
580 destMatrix = rcp (
new this_BCRS_type (*destGraph, getBlockSize()));
585 template<
class Scalar,
class LO,
class GO,
class Node>
590 auto val_d = val_.getDeviceView(Access::OverwriteAll);
594 template<
class Scalar,
class LO,
class GO,
class Node>
600 const LO numColInds)
const
602 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
603 offsets_host_view(Kokkos::ViewAllocateWithoutInitializing(
"offsets"), numColInds);
604 ptrdiff_t * offsets = offsets_host_view.data();
605 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets, colInds, numColInds);
606 const LO validCount = this->replaceLocalValuesByOffsets(localRowInd, offsets, vals, numOffsets);
610 template <
class Scalar,
class LO,
class GO,
class Node>
614 Kokkos::MemoryUnmanaged>& offsets)
const
616 graph_.getLocalDiagOffsets (offsets);
619 template <
class Scalar,
class LO,
class GO,
class Node>
623 Kokkos::MemoryUnmanaged>& diag,
624 const Kokkos::View<
const size_t*, device_type,
625 Kokkos::MemoryUnmanaged>& offsets)
const
627 using Kokkos::parallel_for;
628 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
630 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getLocalNumElements ());
631 const LO blockSize = this->getBlockSize ();
632 TEUCHOS_TEST_FOR_EXCEPTION
633 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
634 static_cast<LO> (diag.extent (1)) < blockSize ||
635 static_cast<LO> (diag.extent (2)) < blockSize,
636 std::invalid_argument, prefix <<
637 "The input Kokkos::View is not big enough to hold all the data.");
638 TEUCHOS_TEST_FOR_EXCEPTION
639 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
640 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
641 "diagonal blocks " << lclNumMeshRows <<
".");
643 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
644 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
650 auto val_d = val_.getDeviceView(Access::ReadOnly);
651 parallel_for (policy_type (0, lclNumMeshRows),
652 functor_type (diag, val_d, offsets,
653 graph_.getLocalGraphDevice ().row_map, blockSize_));
656 template<
class Scalar,
class LO,
class GO,
class Node>
662 const LO numColInds)
const
664 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
665 offsets_host_view(Kokkos::ViewAllocateWithoutInitializing(
"offsets"), numColInds);
666 ptrdiff_t * offsets = offsets_host_view.data();
667 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets, colInds, numColInds);
668 const LO validCount = this->absMaxLocalValuesByOffsets(localRowInd, offsets, vals, numOffsets);
673 template<
class Scalar,
class LO,
class GO,
class Node>
679 const LO numColInds)
const
681 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
682 offsets_host_view(Kokkos::ViewAllocateWithoutInitializing(
"offsets"), numColInds);
683 ptrdiff_t * offsets = offsets_host_view.data();
684 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets, colInds, numColInds);
685 const LO validCount = this->sumIntoLocalValuesByOffsets(localRowInd, offsets, vals, numOffsets);
688 template<
class Scalar,
class LO,
class GO,
class Node>
692 nonconst_local_inds_host_view_type &Indices,
693 nonconst_values_host_view_type &Values,
694 size_t& NumEntries)
const
696 auto vals = getValuesHost(LocalRow);
697 const LO numInds = ptrHost_(LocalRow+1) - ptrHost_(LocalRow);
698 if (numInds > (LO)Indices.extent(0) || numInds*blockSize_*blockSize_ > (LO)Values.extent(0)) {
699 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
700 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
701 << numInds <<
" row entries");
703 const LO * colInds = indHost_.data() + ptrHost_(LocalRow);
704 for (LO i=0; i<numInds; ++i) {
705 Indices[i] = colInds[i];
707 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
710 NumEntries = numInds;
713 template<
class Scalar,
class LO,
class GO,
class Node>
719 const LO numColInds)
const
721 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
726 return static_cast<LO
> (0);
729 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
733 for (LO k = 0; k < numColInds; ++k) {
734 const LO relBlockOffset =
735 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
736 if (relBlockOffset != LINV) {
737 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
738 hint = relBlockOffset + 1;
746 template<
class Scalar,
class LO,
class GO,
class Node>
750 const ptrdiff_t offsets[],
752 const LO numOffsets)
const
754 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
759 return static_cast<LO
> (0);
766 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
767 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid ();
768 size_t pointOffset = 0;
771 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
772 const size_t blockOffset = offsets[k]*perBlockSize;
773 if (offsets[k] != STINV) {
775 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
784 template<
class Scalar,
class LO,
class GO,
class Node>
788 const ptrdiff_t offsets[],
790 const LO numOffsets)
const
792 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
797 return static_cast<LO
> (0);
799 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
> (vals);
800 auto val_out = getValuesHost(localRowInd);
801 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
803 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
804 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
805 size_t pointOffset = 0;
808 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
809 const size_t blockOffset = offsets[k]*perBlockSize;
810 if (blockOffset != STINV) {
811 little_block_type A_old = getNonConstLocalBlockFromInput (vOut, blockOffset);
812 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
821 template<
class Scalar,
class LO,
class GO,
class Node>
825 const ptrdiff_t offsets[],
827 const LO numOffsets)
const
829 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
834 return static_cast<LO
> (0);
842 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
843 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
844 size_t pointOffset = 0;
847 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
848 const size_t blockOffset = offsets[k]*perBlockSize;
849 if (blockOffset != STINV) {
851 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
852 AXPY (ONE, A_new, A_old);
859 template<
class Scalar,
class LO,
class GO,
class Node>
861 impl_scalar_type_dualview::t_host::const_type
865 return val_.getHostView(Access::ReadOnly);
868 template<
class Scalar,
class LO,
class GO,
class Node>
869 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
870 impl_scalar_type_dualview::t_dev::const_type
871 BlockCrsMatrix<Scalar, LO, GO, Node>::
872 getValuesDevice ()
const
874 return val_.getDeviceView(Access::ReadOnly);
877 template<
class Scalar,
class LO,
class GO,
class Node>
878 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
879 impl_scalar_type_dualview::t_host
883 return val_.getHostView(Access::ReadWrite);
886 template<
class Scalar,
class LO,
class GO,
class Node>
888 impl_scalar_type_dualview::t_dev
892 return val_.getDeviceView(Access::ReadWrite);
895 template<
class Scalar,
class LO,
class GO,
class Node>
896 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
897 impl_scalar_type_dualview::t_host::const_type
898 BlockCrsMatrix<Scalar, LO, GO, Node>::
899 getValuesHost (
const LO& lclRow)
const
901 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
902 auto val = val_.getHostView(Access::ReadOnly);
903 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
907 template<
class Scalar,
class LO,
class GO,
class Node>
909 impl_scalar_type_dualview::t_dev::const_type
913 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
914 auto val = val_.getDeviceView(Access::ReadOnly);
915 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
919 template<
class Scalar,
class LO,
class GO,
class Node>
924 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
925 auto val = val_.getHostView(Access::ReadWrite);
926 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
930 template<
class Scalar,
class LO,
class GO,
class Node>
935 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
936 auto val = val_.getDeviceView(Access::ReadWrite);
937 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
941 template<
class Scalar,
class LO,
class GO,
class Node>
946 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
947 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
948 return static_cast<LO
> (0);
950 return static_cast<LO
> (numEntInGraph);
954 template<
class Scalar,
class LO,
class GO,
class Node>
955 typename BlockCrsMatrix<Scalar, LO, GO, Node>::local_matrix_device_type
959 auto numCols = this->graph_.getColMap()->getLocalNumElements();
960 auto val = val_.getDeviceView(Access::ReadWrite);
961 const LO blockSize = this->getBlockSize ();
962 const auto graph = this->graph_.getLocalGraphDevice ();
964 return local_matrix_device_type(
"Tpetra::BlockCrsMatrix::lclMatrixDevice",
971 template<
class Scalar,
class LO,
class GO,
class Node>
976 const Teuchos::ETransp mode,
986 TEUCHOS_TEST_FOR_EXCEPTION(
987 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
988 "transpose and conjugate transpose modes are not yet implemented.");
991 template<
class Scalar,
class LO,
class GO,
class Node>
993 BlockCrsMatrix<Scalar, LO, GO, Node>::
994 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
995 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1001 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1002 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1003 const Scalar zero = STS::zero ();
1004 const Scalar one = STS::one ();
1005 RCP<const import_type>
import = graph_.getImporter ();
1007 RCP<const export_type> theExport = graph_.getExporter ();
1008 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1010 if (alpha == zero) {
1014 else if (beta != one) {
1019 const BMV* X_colMap = NULL;
1020 if (
import.is_null ()) {
1024 catch (std::exception& e) {
1025 TEUCHOS_TEST_FOR_EXCEPTION
1026 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1027 "operator= threw an exception: " << std::endl << e.what ());
1037 if ((*X_colMap_).is_null () ||
1038 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1039 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1040 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1041 static_cast<LO
> (X.getNumVectors ())));
1043 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1047 X_colMap = &(**X_colMap_);
1049 catch (std::exception& e) {
1050 TEUCHOS_TEST_FOR_EXCEPTION
1051 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1052 "operator= threw an exception: " << std::endl << e.what ());
1056 BMV* Y_rowMap = NULL;
1057 if (theExport.is_null ()) {
1060 else if ((*Y_rowMap_).is_null () ||
1061 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1062 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1063 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1064 static_cast<LO
> (X.getNumVectors ())));
1066 Y_rowMap = &(**Y_rowMap_);
1068 catch (std::exception& e) {
1069 TEUCHOS_TEST_FOR_EXCEPTION(
1070 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1071 "operator= threw an exception: " << std::endl << e.what ());
1076 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1078 catch (std::exception& e) {
1079 TEUCHOS_TEST_FOR_EXCEPTION
1080 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1081 "an exception: " << e.what ());
1084 TEUCHOS_TEST_FOR_EXCEPTION
1085 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1086 "an exception not a subclass of std::exception.");
1089 if (! theExport.is_null ()) {
1096 template <
class Scalar,
class LO,
class GO,
class Node>
1097 void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
1098 const BlockMultiVector<Scalar, LO, GO, Node> &X,
1099 BlockMultiVector<Scalar, LO, GO, Node> &Y,
const Scalar alpha,
1100 const Scalar beta) {
1102 "Tpetra::BlockCrsMatrix::localApplyBlockNoTrans");
1103 const impl_scalar_type alpha_impl = alpha;
1104 const auto graph = this->graph_.getLocalGraphDevice();
1106 auto X_mv = X.getMultiVectorView();
1107 auto Y_mv = Y.getMultiVectorView();
1108 auto X_lcl = X_mv.getLocalViewDevice(Access::ReadOnly);
1109 auto Y_lcl = Y_mv.getLocalViewDevice(Access::ReadWrite);
1111 auto A_lcl = getLocalMatrixDevice();
1112 KokkosSparse::spmv(KokkosSparse::NoTranspose, alpha_impl, A_lcl, X_lcl, beta,
1117 template<
class Scalar,
class LO,
class GO,
class Node>
1119 BlockCrsMatrix<Scalar, LO, GO, Node>::
1120 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1121 const LO colIndexToFind,
1122 const LO hint)
const
1124 const size_t absStartOffset = ptrHost_[localRowIndex];
1125 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1126 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1128 const LO*
const curInd = indHost_.data () + absStartOffset;
1131 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1138 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1143 const LO maxNumEntriesForLinearSearch = 32;
1144 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1149 const LO* beg = curInd;
1150 const LO* end = curInd + numEntriesInRow;
1151 std::pair<const LO*, const LO*> p =
1152 std::equal_range (beg, end, colIndexToFind);
1153 if (p.first != p.second) {
1155 relOffset =
static_cast<LO
> (p.first - beg);
1159 for (LO k = 0; k < numEntriesInRow; ++k) {
1160 if (colIndexToFind == curInd[k]) {
1170 template<
class Scalar,
class LO,
class GO,
class Node>
1172 BlockCrsMatrix<Scalar, LO, GO, Node>::
1173 offsetPerBlock ()
const
1175 return offsetPerBlock_;
1178 template<
class Scalar,
class LO,
class GO,
class Node>
1179 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1180 BlockCrsMatrix<Scalar, LO, GO, Node>::
1181 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1182 const size_t pointOffset)
const
1185 const LO rowStride = blockSize_;
1186 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1189 template<
class Scalar,
class LO,
class GO,
class Node>
1190 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1191 BlockCrsMatrix<Scalar, LO, GO, Node>::
1192 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1193 const size_t pointOffset)
const
1196 const LO rowStride = blockSize_;
1197 return little_block_type (val + pointOffset, blockSize_, rowStride);
1200 template<
class Scalar,
class LO,
class GO,
class Node>
1201 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1202 BlockCrsMatrix<Scalar, LO, GO, Node>::
1203 getNonConstLocalBlockFromInputHost (impl_scalar_type* val,
1204 const size_t pointOffset)
const
1207 const LO rowStride = blockSize_;
1208 const size_t bs2 = blockSize_ * blockSize_;
1209 return little_block_host_type (val + bs2 * pointOffset, blockSize_, rowStride);
1212 template<
class Scalar,
class LO,
class GO,
class Node>
1213 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1214 BlockCrsMatrix<Scalar, LO, GO, Node>::
1215 getLocalBlockDeviceNonConst (
const LO localRowInd,
const LO localColInd)
const
1217 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1219 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1220 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1221 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1222 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1223 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesDeviceNonConst();
1224 auto r_val = getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1228 return little_block_type ();
1232 template<
class Scalar,
class LO,
class GO,
class Node>
1233 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1234 BlockCrsMatrix<Scalar, LO, GO, Node>::
1235 getLocalBlockHostNonConst (
const LO localRowInd,
const LO localColInd)
const
1237 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1239 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1240 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1241 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1242 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1243 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesHostNonConst();
1244 auto r_val = getNonConstLocalBlockFromInputHost (vals.data(), absBlockOffset);
1248 return little_block_host_type ();
1253 template<
class Scalar,
class LO,
class GO,
class Node>
1255 BlockCrsMatrix<Scalar, LO, GO, Node>::
1256 checkSizes (const ::Tpetra::SrcDistObject& source)
1259 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_BCRS_type;
1260 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1263 std::ostream& err = markLocalErrorAndGetStream ();
1264 err <<
"checkSizes: The source object of the Import or Export "
1265 "must be a BlockCrsMatrix with the same template parameters as the "
1266 "target object." << endl;
1271 if (src->getBlockSize () != this->getBlockSize ()) {
1272 std::ostream& err = markLocalErrorAndGetStream ();
1273 err <<
"checkSizes: The source and target objects of the Import or "
1274 <<
"Export must have the same block sizes. The source's block "
1275 <<
"size = " << src->getBlockSize () <<
" != the target's block "
1276 <<
"size = " << this->getBlockSize () <<
"." << endl;
1278 if (! src->graph_.isFillComplete ()) {
1279 std::ostream& err = markLocalErrorAndGetStream ();
1280 err <<
"checkSizes: The source object of the Import or Export is "
1281 "not fill complete. Both source and target objects must be fill "
1282 "complete." << endl;
1284 if (! this->graph_.isFillComplete ()) {
1285 std::ostream& err = markLocalErrorAndGetStream ();
1286 err <<
"checkSizes: The target object of the Import or Export is "
1287 "not fill complete. Both source and target objects must be fill "
1288 "complete." << endl;
1290 if (src->graph_.getColMap ().is_null ()) {
1291 std::ostream& err = markLocalErrorAndGetStream ();
1292 err <<
"checkSizes: The source object of the Import or Export does "
1293 "not have a column Map. Both source and target objects must have "
1294 "column Maps." << endl;
1296 if (this->graph_.getColMap ().is_null ()) {
1297 std::ostream& err = markLocalErrorAndGetStream ();
1298 err <<
"checkSizes: The target object of the Import or Export does "
1299 "not have a column Map. Both source and target objects must have "
1300 "column Maps." << endl;
1304 return ! (* (this->localError_));
1307 template<
class Scalar,
class LO,
class GO,
class Node>
1311 (const ::Tpetra::SrcDistObject& source,
1312 const size_t numSameIDs,
1319 using ::Tpetra::Details::Behavior;
1321 using ::Tpetra::Details::ProfilingRegion;
1325 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1326 const bool debug = Behavior::debug();
1327 const bool verbose = Behavior::verbose();
1332 std::ostringstream os;
1333 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1334 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
1339 if (* (this->localError_)) {
1340 std::ostream& err = this->markLocalErrorAndGetStream ();
1342 <<
"The target object of the Import or Export is already in an error state."
1351 std::ostringstream os;
1352 os << prefix << endl
1355 std::cerr << os.str ();
1361 if (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0)) {
1362 std::ostream& err = this->markLocalErrorAndGetStream ();
1364 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent (0)
1365 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
1369 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
1370 std::ostream& err = this->markLocalErrorAndGetStream ();
1372 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1377 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1379 std::ostream& err = this->markLocalErrorAndGetStream ();
1381 <<
"The source (input) object of the Import or "
1382 "Export is either not a BlockCrsMatrix, or does not have the right "
1383 "template parameters. checkSizes() should have caught this. "
1384 "Please report this bug to the Tpetra developers." << endl;
1388 bool lclErr =
false;
1389 #ifdef HAVE_TPETRA_DEBUG
1390 std::set<LO> invalidSrcCopyRows;
1391 std::set<LO> invalidDstCopyRows;
1392 std::set<LO> invalidDstCopyCols;
1393 std::set<LO> invalidDstPermuteCols;
1394 std::set<LO> invalidPermuteFromRows;
1395 #endif // HAVE_TPETRA_DEBUG
1404 #ifdef HAVE_TPETRA_DEBUG
1405 const map_type& srcRowMap = * (src->graph_.getRowMap ());
1406 #endif // HAVE_TPETRA_DEBUG
1407 const map_type& dstRowMap = * (this->graph_.getRowMap ());
1408 const map_type& srcColMap = * (src->graph_.getColMap ());
1409 const map_type& dstColMap = * (this->graph_.getColMap ());
1410 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
1412 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.extent(0));
1414 std::ostringstream os;
1416 <<
"canUseLocalColumnIndices: "
1417 << (canUseLocalColumnIndices ?
"true" :
"false")
1418 <<
", numPermute: " << numPermute
1420 std::cerr << os.str ();
1423 const auto permuteToLIDsHost = permuteToLIDs.view_host();
1424 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
1426 if (canUseLocalColumnIndices) {
1428 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1429 #ifdef HAVE_TPETRA_DEBUG
1432 invalidSrcCopyRows.insert (localRow);
1435 #endif // HAVE_TPETRA_DEBUG
1437 local_inds_host_view_type lclSrcCols;
1438 values_host_view_type vals;
1442 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1443 if (numEntries > 0) {
1444 LO err = this->replaceLocalValues (localRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1445 if (err != numEntries) {
1448 #ifdef HAVE_TPETRA_DEBUG
1449 invalidDstCopyRows.insert (localRow);
1450 #endif // HAVE_TPETRA_DEBUG
1458 for (LO k = 0; k < numEntries; ++k) {
1461 #ifdef HAVE_TPETRA_DEBUG
1462 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
1463 #endif // HAVE_TPETRA_DEBUG
1472 for (
size_t k = 0; k < numPermute; ++k) {
1473 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1474 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1476 local_inds_host_view_type lclSrcCols;
1477 values_host_view_type vals;
1479 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1480 if (numEntries > 0) {
1481 LO err = this->replaceLocalValues (dstLclRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1482 if (err != numEntries) {
1484 #ifdef HAVE_TPETRA_DEBUG
1485 for (LO c = 0; c < numEntries; ++c) {
1487 invalidDstPermuteCols.insert (lclSrcCols[c]);
1490 #endif // HAVE_TPETRA_DEBUG
1497 const size_t maxNumEnt = src->graph_.getLocalMaxNumRowEntries ();
1498 Teuchos::Array<LO> lclDstCols (maxNumEnt);
1501 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1502 local_inds_host_view_type lclSrcCols;
1503 values_host_view_type vals;
1509 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1510 }
catch (std::exception& e) {
1512 std::ostringstream os;
1513 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1514 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1515 << localRow <<
", src->getLocalRowView() threw an exception: "
1517 std::cerr << os.str ();
1522 if (numEntries > 0) {
1523 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1526 std::ostringstream os;
1527 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1528 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1529 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
1530 << maxNumEnt << endl;
1531 std::cerr << os.str ();
1537 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1538 for (LO j = 0; j < numEntries; ++j) {
1540 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1542 #ifdef HAVE_TPETRA_DEBUG
1543 invalidDstCopyCols.insert (lclDstColsView[j]);
1544 #endif // HAVE_TPETRA_DEBUG
1549 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1550 }
catch (std::exception& e) {
1552 std::ostringstream os;
1553 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1554 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1555 << localRow <<
", this->replaceLocalValues() threw an exception: "
1557 std::cerr << os.str ();
1561 if (err != numEntries) {
1564 std::ostringstream os;
1565 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1566 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
1567 "localRow " << localRow <<
", this->replaceLocalValues "
1568 "returned " << err <<
" instead of numEntries = "
1569 << numEntries << endl;
1570 std::cerr << os.str ();
1578 for (
size_t k = 0; k < numPermute; ++k) {
1579 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1580 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1582 local_inds_host_view_type lclSrcCols;
1583 values_host_view_type vals;
1587 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1588 }
catch (std::exception& e) {
1590 std::ostringstream os;
1591 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1592 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
1593 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
1594 <<
", src->getLocalRowView() threw an exception: " << e.what ();
1595 std::cerr << os.str ();
1600 if (numEntries > 0) {
1601 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1607 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1608 for (LO j = 0; j < numEntries; ++j) {
1610 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1612 #ifdef HAVE_TPETRA_DEBUG
1613 invalidDstPermuteCols.insert (lclDstColsView[j]);
1614 #endif // HAVE_TPETRA_DEBUG
1617 LO err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1618 if (err != numEntries) {
1627 std::ostream& err = this->markLocalErrorAndGetStream ();
1628 #ifdef HAVE_TPETRA_DEBUG
1629 err <<
"copyAndPermute: The graph structure of the source of the "
1630 "Import or Export must be a subset of the graph structure of the "
1632 err <<
"invalidSrcCopyRows = [";
1633 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
1634 it != invalidSrcCopyRows.end (); ++it) {
1636 typename std::set<LO>::const_iterator itp1 = it;
1638 if (itp1 != invalidSrcCopyRows.end ()) {
1642 err <<
"], invalidDstCopyRows = [";
1643 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
1644 it != invalidDstCopyRows.end (); ++it) {
1646 typename std::set<LO>::const_iterator itp1 = it;
1648 if (itp1 != invalidDstCopyRows.end ()) {
1652 err <<
"], invalidDstCopyCols = [";
1653 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
1654 it != invalidDstCopyCols.end (); ++it) {
1656 typename std::set<LO>::const_iterator itp1 = it;
1658 if (itp1 != invalidDstCopyCols.end ()) {
1662 err <<
"], invalidDstPermuteCols = [";
1663 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
1664 it != invalidDstPermuteCols.end (); ++it) {
1666 typename std::set<LO>::const_iterator itp1 = it;
1668 if (itp1 != invalidDstPermuteCols.end ()) {
1672 err <<
"], invalidPermuteFromRows = [";
1673 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
1674 it != invalidPermuteFromRows.end (); ++it) {
1676 typename std::set<LO>::const_iterator itp1 = it;
1678 if (itp1 != invalidPermuteFromRows.end ()) {
1684 err <<
"copyAndPermute: The graph structure of the source of the "
1685 "Import or Export must be a subset of the graph structure of the "
1687 #endif // HAVE_TPETRA_DEBUG
1691 std::ostringstream os;
1692 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1693 const bool lclSuccess = ! (* (this->localError_));
1694 os <<
"*** Proc " << myRank <<
": copyAndPermute "
1695 << (lclSuccess ?
"succeeded" :
"FAILED");
1699 os <<
": error messages: " << this->errorMessages ();
1701 std::cerr << os.str ();
1725 template<
class LO,
class GO>
1727 packRowCount (
const size_t numEnt,
1728 const size_t numBytesPerValue,
1729 const size_t blkSize)
1731 using ::Tpetra::Details::PackTraits;
1742 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1743 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
1744 return numEntLen + gidsLen + valsLen;
1758 template<
class ST,
class LO,
class GO>
1760 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
1761 const size_t offset,
1762 const size_t numBytes,
1765 using Kokkos::subview;
1766 using ::Tpetra::Details::PackTraits;
1768 if (numBytes == 0) {
1770 return static_cast<size_t> (0);
1775 TEUCHOS_TEST_FOR_EXCEPTION
1776 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1777 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
1779 const char*
const inBuf = imports.data () + offset;
1781 TEUCHOS_TEST_FOR_EXCEPTION
1782 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1783 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
1785 return static_cast<size_t> (numEntLO);
1792 template<
class ST,
class LO,
class GO>
1794 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
1795 const size_t offset,
1796 const size_t numEnt,
1797 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
1798 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
1799 const size_t numBytesPerValue,
1800 const size_t blockSize)
1802 using Kokkos::subview;
1803 using ::Tpetra::Details::PackTraits;
1809 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1812 const LO numEntLO =
static_cast<size_t> (numEnt);
1814 const size_t numEntBeg = offset;
1816 const size_t gidsBeg = numEntBeg + numEntLen;
1817 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1818 const size_t valsBeg = gidsBeg + gidsLen;
1819 const size_t valsLen = numScalarEnt * numBytesPerValue;
1821 char*
const numEntOut = exports.data () + numEntBeg;
1822 char*
const gidsOut = exports.data () + gidsBeg;
1823 char*
const valsOut = exports.data () + valsBeg;
1825 size_t numBytesOut = 0;
1830 Kokkos::pair<int, size_t> p;
1831 p = PackTraits<GO>::packArray (gidsOut, gidsIn.data (), numEnt);
1832 errorCode += p.first;
1833 numBytesOut += p.second;
1835 p = PackTraits<ST>::packArray (valsOut, valsIn.data (), numScalarEnt);
1836 errorCode += p.first;
1837 numBytesOut += p.second;
1840 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1841 TEUCHOS_TEST_FOR_EXCEPTION
1842 (numBytesOut != expectedNumBytes, std::logic_error,
1843 "packRowForBlockCrs: numBytesOut = " << numBytesOut
1844 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1846 TEUCHOS_TEST_FOR_EXCEPTION
1847 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
1848 "PackTraits::packArray returned a nonzero error code");
1854 template<
class ST,
class LO,
class GO>
1856 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
1857 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
1858 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
1859 const size_t offset,
1860 const size_t numBytes,
1861 const size_t numEnt,
1862 const size_t numBytesPerValue,
1863 const size_t blockSize)
1865 using ::Tpetra::Details::PackTraits;
1867 if (numBytes == 0) {
1871 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1872 TEUCHOS_TEST_FOR_EXCEPTION
1873 (static_cast<size_t> (imports.extent (0)) <= offset,
1874 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1875 << imports.extent (0) <<
" <= offset = " << offset <<
".");
1876 TEUCHOS_TEST_FOR_EXCEPTION
1877 (static_cast<size_t> (imports.extent (0)) < offset + numBytes,
1878 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1879 << imports.extent (0) <<
" < offset + numBytes = "
1880 << (offset + numBytes) <<
".");
1885 const size_t numEntBeg = offset;
1887 const size_t gidsBeg = numEntBeg + numEntLen;
1888 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1889 const size_t valsBeg = gidsBeg + gidsLen;
1890 const size_t valsLen = numScalarEnt * numBytesPerValue;
1892 const char*
const numEntIn = imports.data () + numEntBeg;
1893 const char*
const gidsIn = imports.data () + gidsBeg;
1894 const char*
const valsIn = imports.data () + valsBeg;
1896 size_t numBytesOut = 0;
1900 TEUCHOS_TEST_FOR_EXCEPTION
1901 (static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
1902 "unpackRowForBlockCrs: Expected number of entries " << numEnt
1903 <<
" != actual number of entries " << numEntOut <<
".");
1906 Kokkos::pair<int, size_t> p;
1907 p = PackTraits<GO>::unpackArray (gidsOut.data (), gidsIn, numEnt);
1908 errorCode += p.first;
1909 numBytesOut += p.second;
1911 p = PackTraits<ST>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
1912 errorCode += p.first;
1913 numBytesOut += p.second;
1916 TEUCHOS_TEST_FOR_EXCEPTION
1917 (numBytesOut != numBytes, std::logic_error,
1918 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1919 <<
" != numBytes = " << numBytes <<
".");
1921 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1922 TEUCHOS_TEST_FOR_EXCEPTION
1923 (numBytesOut != expectedNumBytes, std::logic_error,
1924 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1925 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1927 TEUCHOS_TEST_FOR_EXCEPTION
1928 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
1929 "PackTraits::unpackArray returned a nonzero error code");
1935 template<
class Scalar,
class LO,
class GO,
class Node>
1939 (const ::Tpetra::SrcDistObject& source,
1944 Kokkos::DualView<
size_t*,
1946 size_t& constantNumPackets)
1948 using ::Tpetra::Details::Behavior;
1950 using ::Tpetra::Details::ProfilingRegion;
1951 using ::Tpetra::Details::PackTraits;
1953 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space host_exec;
1957 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
1959 const bool debug = Behavior::debug();
1960 const bool verbose = Behavior::verbose();
1965 std::ostringstream os;
1966 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1967 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
1972 if (* (this->localError_)) {
1973 std::ostream& err = this->markLocalErrorAndGetStream ();
1975 <<
"The target object of the Import or Export is already in an error state."
1984 std::ostringstream os;
1985 os << prefix << std::endl
1989 std::cerr << os.str ();
1995 if (exportLIDs.extent (0) != numPacketsPerLID.extent (0)) {
1996 std::ostream& err = this->markLocalErrorAndGetStream ();
1998 <<
"exportLIDs.extent(0) = " << exportLIDs.extent (0)
1999 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2000 <<
"." << std::endl;
2003 if (exportLIDs.need_sync_host ()) {
2004 std::ostream& err = this->markLocalErrorAndGetStream ();
2005 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
2009 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
2011 std::ostream& err = this->markLocalErrorAndGetStream ();
2013 <<
"The source (input) object of the Import or "
2014 "Export is either not a BlockCrsMatrix, or does not have the right "
2015 "template parameters. checkSizes() should have caught this. "
2016 "Please report this bug to the Tpetra developers." << std::endl;
2028 constantNumPackets = 0;
2032 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2033 const size_t numExportLIDs = exportLIDs.extent (0);
2034 size_t numBytesPerValue(0);
2036 auto val_host = val_.getHostView(Access::ReadOnly);
2038 PackTraits<impl_scalar_type>
2046 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
2049 auto exportLIDsHost = exportLIDs.view_host();
2050 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2051 numPacketsPerLID.modify_host ();
2053 using reducer_type = Impl::BlockCrsReducer<Impl::BlockCrsRowStruct<size_t>,host_exec>;
2054 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs);
2055 Kokkos::parallel_reduce
2057 [=](
const size_t &i,
typename reducer_type::value_type &update) {
2058 const LO lclRow = exportLIDsHost(i);
2060 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid () ? 0 : numEnt);
2062 const size_t numBytes =
2063 packRowCount<LO, GO> (numEnt, numBytesPerValue, blockSize);
2064 numPacketsPerLIDHost(i) = numBytes;
2065 update +=
typename reducer_type::value_type(numEnt, numBytes, numEnt);
2066 }, rowReducerStruct);
2072 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
2073 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
2074 const size_t maxRowLength = rowReducerStruct.maxRowLength;
2077 std::ostringstream os;
2079 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2081 std::cerr << os.str ();
2087 if(exports.extent(0) != totalNumBytes)
2089 const std::string oldLabel = exports.d_view.label ();
2090 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
2091 exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
2093 if (totalNumEntries > 0) {
2095 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs+1);
2097 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs+1);
2098 Kokkos::parallel_scan
2100 [=](
const size_t &i,
size_t &update,
const bool &
final) {
2101 if (
final) offset(i) = update;
2102 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
2105 if (offset(numExportLIDs) != totalNumBytes) {
2106 std::ostream& err = this->markLocalErrorAndGetStream ();
2108 <<
"At end of method, the final offset (in bytes) "
2109 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
2110 << totalNumBytes <<
". "
2111 <<
"Please report this bug to the Tpetra developers." << std::endl;
2125 exports.modify_host();
2127 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2129 policy_type(numExportLIDs, 1, 1)
2130 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2135 Kokkos::parallel_for
2137 [=](
const typename policy_type::member_type &member) {
2138 const size_t i = member.league_rank();
2139 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2140 gblColInds(member.team_scratch(0), maxRowLength);
2142 const LO lclRowInd = exportLIDsHost(i);
2143 local_inds_host_view_type lclColInds;
2144 values_host_view_type vals;
2149 src->getLocalRowView (lclRowInd, lclColInds, vals);
2150 const size_t numEnt = lclColInds.extent(0);
2153 for (
size_t j = 0; j < numEnt; ++j)
2160 const size_t numBytes =
2161 packRowForBlockCrs<impl_scalar_type, LO, GO>
2162 (exports.view_host(),
2165 Kokkos::View<const GO*, host_exec>(gblColInds.data(), maxRowLength),
2172 const size_t offsetDiff = offset(i+1) - offset(i);
2173 if (numBytes != offsetDiff) {
2174 std::ostringstream os;
2176 <<
"numBytes computed from packRowForBlockCrs is different from "
2177 <<
"precomputed offset values, LID = " << i << std::endl;
2178 std::cerr << os.str ();
2187 std::ostringstream os;
2188 const bool lclSuccess = ! (* (this->localError_));
2190 << (lclSuccess ?
"succeeded" :
"FAILED")
2192 std::cerr << os.str ();
2196 template<
class Scalar,
class LO,
class GO,
class Node>
2204 Kokkos::DualView<
size_t*,
2209 using ::Tpetra::Details::Behavior;
2211 using ::Tpetra::Details::ProfilingRegion;
2212 using ::Tpetra::Details::PackTraits;
2215 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
2217 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2218 const bool verbose = Behavior::verbose ();
2223 std::ostringstream os;
2224 auto map = this->graph_.getRowMap ();
2225 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
2226 const int myRank = comm.is_null () ? -1 : comm->getRank ();
2227 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2230 os <<
"Start" << endl;
2231 std::cerr << os.str ();
2236 if (* (this->localError_)) {
2237 std::ostream& err = this->markLocalErrorAndGetStream ();
2238 std::ostringstream os;
2239 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
2241 std::cerr << os.str ();
2250 if (importLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2251 std::ostream& err = this->markLocalErrorAndGetStream ();
2252 std::ostringstream os;
2253 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent (0)
2254 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2257 std::cerr << os.str ();
2263 if (combineMode !=
ADD && combineMode !=
INSERT &&
2265 combineMode !=
ZERO) {
2266 std::ostream& err = this->markLocalErrorAndGetStream ();
2267 std::ostringstream os;
2269 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
2270 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2273 std::cerr << os.str ();
2279 if (this->graph_.getColMap ().is_null ()) {
2280 std::ostream& err = this->markLocalErrorAndGetStream ();
2281 std::ostringstream os;
2282 os << prefix <<
"Target matrix's column Map is null." << endl;
2284 std::cerr << os.str ();
2293 const map_type& tgtColMap = * (this->graph_.getColMap ());
2296 const size_t blockSize = this->getBlockSize ();
2297 const size_t numImportLIDs = importLIDs.extent(0);
2304 size_t numBytesPerValue(0);
2306 auto val_host = val_.getHostView(Access::ReadOnly);
2308 PackTraits<impl_scalar_type>::packValueCount
2311 const size_t maxRowNumEnt = graph_.getLocalMaxNumRowEntries ();
2312 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
2315 std::ostringstream os;
2316 os << prefix <<
"combineMode: "
2317 << ::Tpetra::combineModeToString (combineMode)
2318 <<
", blockSize: " << blockSize
2319 <<
", numImportLIDs: " << numImportLIDs
2320 <<
", numBytesPerValue: " << numBytesPerValue
2321 <<
", maxRowNumEnt: " << maxRowNumEnt
2322 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
2323 std::cerr << os.str ();
2326 if (combineMode ==
ZERO || numImportLIDs == 0) {
2328 std::ostringstream os;
2329 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
2330 std::cerr << os.str ();
2337 if (imports.need_sync_host ()) {
2338 imports.sync_host ();
2348 if (imports.need_sync_host () || numPacketsPerLID.need_sync_host () ||
2349 importLIDs.need_sync_host ()) {
2350 std::ostream& err = this->markLocalErrorAndGetStream ();
2351 std::ostringstream os;
2352 os << prefix <<
"All input DualViews must be sync'd to host by now. "
2353 <<
"imports_nc.need_sync_host()="
2354 << (imports.need_sync_host () ?
"true" :
"false")
2355 <<
", numPacketsPerLID.need_sync_host()="
2356 << (numPacketsPerLID.need_sync_host () ?
"true" :
"false")
2357 <<
", importLIDs.need_sync_host()="
2358 << (importLIDs.need_sync_host () ?
"true" :
"false")
2361 std::cerr << os.str ();
2367 const auto importLIDsHost = importLIDs.view_host ();
2368 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host ();
2374 Kokkos::View<size_t*, host_exec> offset (
"offset", numImportLIDs+1);
2376 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numImportLIDs+1);
2377 Kokkos::parallel_scan
2378 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
2379 [=] (
const size_t &i,
size_t &update,
const bool &
final) {
2380 if (
final) offset(i) = update;
2381 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
2391 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2392 errorDuringUnpack (
"errorDuringUnpack");
2393 errorDuringUnpack () = 0;
2395 using policy_type = Kokkos::TeamPolicy<host_exec>;
2396 size_t scratch_per_row =
sizeof(GO) * maxRowNumEnt +
sizeof (LO) * maxRowNumEnt + numBytesPerValue * maxRowNumScalarEnt
2399 const auto policy = policy_type (numImportLIDs, 1, 1)
2400 .set_scratch_size (0, Kokkos::PerTeam (scratch_per_row));
2401 using host_scratch_space =
typename host_exec::scratch_memory_space;
2403 using pair_type = Kokkos::pair<size_t, size_t>;
2406 getValuesHostNonConst();
2408 Kokkos::parallel_for
2409 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
2410 [=] (
const typename policy_type::member_type& member) {
2411 const size_t i = member.league_rank();
2412 Kokkos::View<GO*, host_scratch_space> gblColInds
2413 (member.team_scratch (0), maxRowNumEnt);
2414 Kokkos::View<LO*, host_scratch_space> lclColInds
2415 (member.team_scratch (0), maxRowNumEnt);
2416 Kokkos::View<impl_scalar_type*, host_scratch_space> vals
2417 (member.team_scratch (0), maxRowNumScalarEnt);
2420 const size_t offval = offset(i);
2421 const LO lclRow = importLIDsHost(i);
2422 const size_t numBytes = numPacketsPerLIDHost(i);
2423 const size_t numEnt =
2424 unpackRowCount<impl_scalar_type, LO, GO>
2425 (imports.view_host (), offval, numBytes, numBytesPerValue);
2428 if (numEnt > maxRowNumEnt) {
2429 errorDuringUnpack() = 1;
2431 std::ostringstream os;
2433 <<
"At i = " << i <<
", numEnt = " << numEnt
2434 <<
" > maxRowNumEnt = " << maxRowNumEnt
2436 std::cerr << os.str ();
2441 const size_t numScalarEnt = numEnt*blockSize*blockSize;
2442 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
2443 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
2444 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
2449 size_t numBytesOut = 0;
2452 unpackRowForBlockCrs<impl_scalar_type, LO, GO>
2453 (Kokkos::View<GO*,host_exec>(gidsOut.data(), numEnt),
2454 Kokkos::View<impl_scalar_type*,host_exec>(valsOut.data(), numScalarEnt),
2455 imports.view_host(),
2456 offval, numBytes, numEnt,
2457 numBytesPerValue, blockSize);
2459 catch (std::exception& e) {
2460 errorDuringUnpack() = 1;
2462 std::ostringstream os;
2463 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
2464 << e.what () << endl;
2465 std::cerr << os.str ();
2470 if (numBytes != numBytesOut) {
2471 errorDuringUnpack() = 1;
2473 std::ostringstream os;
2475 <<
"At i = " << i <<
", numBytes = " << numBytes
2476 <<
" != numBytesOut = " << numBytesOut <<
"."
2478 std::cerr << os.str();
2484 for (
size_t k = 0; k < numEnt; ++k) {
2486 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
2488 std::ostringstream os;
2490 <<
"At i = " << i <<
", GID " << gidsOut(k)
2491 <<
" is not owned by the calling process."
2493 std::cerr << os.str();
2501 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
2502 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>
2504 if (combineMode ==
ADD) {
2505 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2506 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
2507 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2508 }
else if (combineMode ==
ABSMAX) {
2509 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2512 if (static_cast<LO> (numEnt) != numCombd) {
2513 errorDuringUnpack() = 1;
2515 std::ostringstream os;
2516 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
2517 <<
" != numCombd = " << numCombd <<
"."
2519 std::cerr << os.str ();
2527 if (errorDuringUnpack () != 0) {
2528 std::ostream& err = this->markLocalErrorAndGetStream ();
2529 err << prefix <<
"Unpacking failed.";
2531 err <<
" Please run again with the environment variable setting "
2532 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
2538 std::ostringstream os;
2539 const bool lclSuccess = ! (* (this->localError_));
2541 << (lclSuccess ?
"succeeded" :
"FAILED")
2543 std::cerr << os.str ();
2547 template<
class Scalar,
class LO,
class GO,
class Node>
2551 using Teuchos::TypeNameTraits;
2552 std::ostringstream os;
2553 os <<
"\"Tpetra::BlockCrsMatrix\": { "
2554 <<
"Template parameters: { "
2555 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
2556 <<
"LO: " << TypeNameTraits<LO>::name ()
2557 <<
"GO: " << TypeNameTraits<GO>::name ()
2558 <<
"Node: " << TypeNameTraits<Node>::name ()
2560 <<
", Label: \"" << this->getObjectLabel () <<
"\""
2561 <<
", Global dimensions: ["
2562 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2563 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
2564 <<
", Block size: " << getBlockSize ()
2570 template<
class Scalar,
class LO,
class GO,
class Node>
2574 const Teuchos::EVerbosityLevel verbLevel)
const
2576 using Teuchos::ArrayRCP;
2577 using Teuchos::CommRequest;
2578 using Teuchos::FancyOStream;
2579 using Teuchos::getFancyOStream;
2580 using Teuchos::ireceive;
2581 using Teuchos::isend;
2582 using Teuchos::outArg;
2583 using Teuchos::TypeNameTraits;
2584 using Teuchos::VERB_DEFAULT;
2585 using Teuchos::VERB_NONE;
2586 using Teuchos::VERB_LOW;
2587 using Teuchos::VERB_MEDIUM;
2589 using Teuchos::VERB_EXTREME;
2591 using Teuchos::wait;
2595 const Teuchos::EVerbosityLevel vl =
2596 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2598 if (vl == VERB_NONE) {
2603 Teuchos::OSTab tab0 (out);
2605 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
2606 Teuchos::OSTab tab1 (out);
2608 out <<
"Template parameters:" << endl;
2610 Teuchos::OSTab tab2 (out);
2611 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
2612 <<
"LO: " << TypeNameTraits<LO>::name () << endl
2613 <<
"GO: " << TypeNameTraits<GO>::name () << endl
2614 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
2616 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
2617 <<
"Global dimensions: ["
2618 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2619 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
2621 const LO blockSize = getBlockSize ();
2622 out <<
"Block size: " << blockSize << endl;
2625 if (vl >= VERB_MEDIUM) {
2626 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2627 const int myRank = comm.getRank ();
2629 out <<
"Row Map:" << endl;
2631 getRowMap()->describe (out, vl);
2633 if (! getColMap ().is_null ()) {
2634 if (getColMap() == getRowMap()) {
2636 out <<
"Column Map: same as row Map" << endl;
2641 out <<
"Column Map:" << endl;
2643 getColMap ()->describe (out, vl);
2646 if (! getDomainMap ().is_null ()) {
2647 if (getDomainMap () == getRowMap ()) {
2649 out <<
"Domain Map: same as row Map" << endl;
2652 else if (getDomainMap () == getColMap ()) {
2654 out <<
"Domain Map: same as column Map" << endl;
2659 out <<
"Domain Map:" << endl;
2661 getDomainMap ()->describe (out, vl);
2664 if (! getRangeMap ().is_null ()) {
2665 if (getRangeMap () == getDomainMap ()) {
2667 out <<
"Range Map: same as domain Map" << endl;
2670 else if (getRangeMap () == getRowMap ()) {
2672 out <<
"Range Map: same as row Map" << endl;
2677 out <<
"Range Map: " << endl;
2679 getRangeMap ()->describe (out, vl);
2684 if (vl >= VERB_EXTREME) {
2685 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2686 const int myRank = comm.getRank ();
2687 const int numProcs = comm.getSize ();
2690 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
2691 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
2692 FancyOStream& os = *osPtr;
2693 os <<
"Process " << myRank <<
":" << endl;
2694 Teuchos::OSTab tab2 (os);
2696 const map_type& meshRowMap = * (graph_.getRowMap ());
2697 const map_type& meshColMap = * (graph_.getColMap ());
2702 os <<
"Row " << meshGblRow <<
": {";
2704 local_inds_host_view_type lclColInds;
2705 values_host_view_type vals;
2707 this->getLocalRowView (meshLclRow, lclColInds, vals); numInds = lclColInds.extent(0);
2709 for (LO k = 0; k < numInds; ++k) {
2712 os <<
"Col " << gblCol <<
": [";
2713 for (LO i = 0; i < blockSize; ++i) {
2714 for (LO j = 0; j < blockSize; ++j) {
2715 os << vals[blockSize*blockSize*k + i*blockSize + j];
2716 if (j + 1 < blockSize) {
2720 if (i + 1 < blockSize) {
2725 if (k + 1 < numInds) {
2735 out << lclOutStrPtr->str ();
2736 lclOutStrPtr = Teuchos::null;
2739 const int sizeTag = 1337;
2740 const int dataTag = 1338;
2742 ArrayRCP<char> recvDataBuf;
2746 for (
int p = 1; p < numProcs; ++p) {
2749 ArrayRCP<size_t> recvSize (1);
2751 RCP<CommRequest<int> > recvSizeReq =
2752 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
2753 wait<int> (comm, outArg (recvSizeReq));
2754 const size_t numCharsToRecv = recvSize[0];
2761 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
2762 recvDataBuf.resize (numCharsToRecv + 1);
2764 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
2766 RCP<CommRequest<int> > recvDataReq =
2767 ireceive<int, char> (recvData, p, dataTag, comm);
2768 wait<int> (comm, outArg (recvDataReq));
2773 recvDataBuf[numCharsToRecv] =
'\0';
2774 out << recvDataBuf.getRawPtr ();
2776 else if (myRank == p) {
2780 const std::string stringToSend = lclOutStrPtr->str ();
2781 lclOutStrPtr = Teuchos::null;
2784 const size_t numCharsToSend = stringToSend.size ();
2785 ArrayRCP<size_t> sendSize (1);
2786 sendSize[0] = numCharsToSend;
2787 RCP<CommRequest<int> > sendSizeReq =
2788 isend<int, size_t> (sendSize, 0, sizeTag, comm);
2789 wait<int> (comm, outArg (sendSizeReq));
2797 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
2798 RCP<CommRequest<int> > sendDataReq =
2799 isend<int, char> (sendData, 0, dataTag, comm);
2800 wait<int> (comm, outArg (sendDataReq));
2806 template<
class Scalar,
class LO,
class GO,
class Node>
2807 Teuchos::RCP<const Teuchos::Comm<int> >
2811 return graph_.getComm();
2815 template<
class Scalar,
class LO,
class GO,
class Node>
2820 return graph_.getGlobalNumCols();
2824 template<
class Scalar,
class LO,
class GO,
class Node>
2829 return graph_.getLocalNumCols();
2832 template<
class Scalar,
class LO,
class GO,
class Node>
2837 return graph_.getIndexBase();
2840 template<
class Scalar,
class LO,
class GO,
class Node>
2845 return graph_.getGlobalNumEntries();
2848 template<
class Scalar,
class LO,
class GO,
class Node>
2853 return graph_.getLocalNumEntries();
2856 template<
class Scalar,
class LO,
class GO,
class Node>
2861 return graph_.getNumEntriesInGlobalRow(globalRow);
2865 template<
class Scalar,
class LO,
class GO,
class Node>
2870 return graph_.getGlobalMaxNumRowEntries();
2873 template<
class Scalar,
class LO,
class GO,
class Node>
2878 return graph_.hasColMap();
2882 template<
class Scalar,
class LO,
class GO,
class Node>
2887 return graph_.isLocallyIndexed();
2890 template<
class Scalar,
class LO,
class GO,
class Node>
2895 return graph_.isGloballyIndexed();
2898 template<
class Scalar,
class LO,
class GO,
class Node>
2903 return graph_.isFillComplete ();
2906 template<
class Scalar,
class LO,
class GO,
class Node>
2914 template<
class Scalar,
class LO,
class GO,
class Node>
2918 nonconst_global_inds_host_view_type &,
2919 nonconst_values_host_view_type &,
2922 TEUCHOS_TEST_FOR_EXCEPTION(
2923 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
2924 "This class doesn't support global matrix indexing.");
2928 template<
class Scalar,
class LO,
class GO,
class Node>
2932 global_inds_host_view_type &,
2933 values_host_view_type &)
const
2935 TEUCHOS_TEST_FOR_EXCEPTION(
2936 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
2937 "This class doesn't support global matrix indexing.");
2941 template<
class Scalar,
class LO,
class GO,
class Node>
2945 local_inds_host_view_type &colInds,
2946 values_host_view_type &vals)
const
2948 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
2949 colInds = local_inds_host_view_type();
2950 vals = values_host_view_type();
2953 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2954 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2955 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
2957 vals = getValuesHost (localRowInd);
2961 template<
class Scalar,
class LO,
class GO,
class Node>
2965 local_inds_host_view_type &colInds,
2966 nonconst_values_host_view_type &vals)
const
2968 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
2969 colInds = nonconst_local_inds_host_view_type();
2970 vals = nonconst_values_host_view_type();
2973 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2974 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2975 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
2982 template<
class Scalar,
class LO,
class GO,
class Node>
2987 const size_t lclNumMeshRows = graph_.getLocalNumRows ();
2989 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
2990 graph_.getLocalDiagOffsets (diagOffsets);
2993 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
2997 auto vals_host_out = val_.getHostView(Access::ReadOnly);
3001 size_t rowOffset = 0;
3003 LO bs = getBlockSize();
3004 for(
size_t r=0; r<getLocalNumRows(); r++)
3007 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3008 for(
int b=0; b<bs; b++)
3013 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3019 template<
class Scalar,
class LO,
class GO,
class Node>
3024 TEUCHOS_TEST_FOR_EXCEPTION(
3025 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3026 "not implemented.");
3030 template<
class Scalar,
class LO,
class GO,
class Node>
3035 TEUCHOS_TEST_FOR_EXCEPTION(
3036 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3037 "not implemented.");
3041 template<
class Scalar,
class LO,
class GO,
class Node>
3042 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3049 template<
class Scalar,
class LO,
class GO,
class Node>
3050 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3054 TEUCHOS_TEST_FOR_EXCEPTION(
3055 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3056 "not implemented.");
3066 #define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3067 template class BlockCrsMatrix< S, LO, GO, NODE >;
3069 #endif // TPETRA_BLOCKCRSMATRIX_DEF_HPP
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
Kokkos::View< impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
std::string description() const override
One-line description of this object.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
void exportAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Export< LO, GO, Node > &exporter) const
Import from this to the given destination matrix, and make the result fill complete.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
LO local_ordinal_type
The type of local indices.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
One or more distributed dense vectors.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Linear algebra kernels for small dense matrices and vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Scalar scalar_type
The type of entries in the matrix (that is, of each entry in each block).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
size_t getLocalNumRows() const override
get the local number of block rows
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
Kokkos::LayoutRight BlockCrsMatrixLittleBlockArrayLayout
give an option to use layoutleft
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
local_ordinal_type getMinLocalIndex() const
The minimum local index.
MultiVector for multiple degrees of freedom per mesh point.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
size_t global_size_t
Global size_t object.
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
global_size_t getGlobalNumRows() const override
get the global number of block rows
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value)
Replace current value at the specified location with specified values.
local_ordinal_type getMaxLocalIndex() const
The maximum local index on the calling process.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
CombineMode
Rule for combining data in an Import or Export.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
Replace old value with maximum of magnitudes of old and new values.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
Replace existing values with new values.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
Replace old values with zero.
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
char packet_type
Implementation detail; tells.
local_matrix_device_type getLocalMatrixDevice() const
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
A distributed dense vector.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Base class for distributed Tpetra objects that support data redistribution.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
void importAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Import< LO, GO, Node > &importer) const
Import from this to the given destination matrix, and make the result fill complete.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row. ...