12 #ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
13 #define TPETRA_BLOCKCRSMATRIX_DEF_HPP
21 #include "Tpetra_BlockMultiVector.hpp"
24 #include "Teuchos_TimeMonitor.hpp"
25 #ifdef HAVE_TPETRA_DEBUG
27 #endif // HAVE_TPETRA_DEBUG
29 #include "KokkosSparse_spmv.hpp"
36 struct BlockCrsRowStruct {
37 T totalNumEntries, totalNumBytes, maxRowLength;
38 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
39 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
40 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
41 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
46 KOKKOS_INLINE_FUNCTION
47 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
48 a.totalNumEntries += b.totalNumEntries;
49 a.totalNumBytes += b.totalNumBytes;
50 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
53 template<
typename T,
typename ExecSpace>
54 struct BlockCrsReducer {
55 typedef BlockCrsReducer reducer;
57 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
60 KOKKOS_INLINE_FUNCTION
61 BlockCrsReducer(value_type &val) : value(&val) {}
63 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
64 KOKKOS_INLINE_FUNCTION
void join(value_type &dst,
const value_type &src)
const { dst += src; }
65 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
66 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
67 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
76 template<
class Scalar,
class LO,
class GO,
class Node>
77 class GetLocalDiagCopy {
79 typedef typename Node::device_type device_type;
80 typedef size_t diag_offset_type;
81 typedef Kokkos::View<
const size_t*, device_type,
82 Kokkos::MemoryUnmanaged> diag_offsets_type;
83 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
85 typedef typename local_graph_device_type::row_map_type row_offsets_type;
86 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
87 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
88 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
91 GetLocalDiagCopy (
const diag_type& diag,
92 const values_type& val,
93 const diag_offsets_type& diagOffsets,
94 const row_offsets_type& ptr,
97 diagOffsets_ (diagOffsets),
99 blockSize_ (blockSize),
100 offsetPerBlock_ (blockSize_*blockSize_),
104 KOKKOS_INLINE_FUNCTION
void
105 operator() (
const LO& lclRowInd)
const
110 const size_t absOffset = ptr_[lclRowInd];
113 const size_t relOffset = diagOffsets_[lclRowInd];
116 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
121 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
122 const_little_block_type;
123 const_little_block_type D_in (val_.data () + pointOffset,
124 blockSize_, blockSize_);
125 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
131 diag_offsets_type diagOffsets_;
132 row_offsets_type ptr_;
139 template<
class Scalar,
class LO,
class GO,
class Node>
141 BlockCrsMatrix<Scalar, LO, GO, Node>::
142 markLocalErrorAndGetStream ()
144 * (this->localError_) =
true;
145 if ((*errs_).is_null ()) {
146 *errs_ = Teuchos::rcp (
new std::ostringstream ());
151 template<
class Scalar,
class LO,
class GO,
class Node>
155 graph_ (Teuchos::rcp (new
map_type ()), 0),
156 blockSize_ (static_cast<LO> (0)),
157 X_colMap_ (new Teuchos::RCP<
BMV> ()),
158 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
159 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
161 localError_ (new bool (false)),
162 errs_ (new Teuchos::RCP<std::ostringstream> ())
166 template<
class Scalar,
class LO,
class GO,
class Node>
169 const LO blockSize) :
172 rowMeshMap_ (* (graph.getRowMap ())),
173 blockSize_ (blockSize),
174 X_colMap_ (new Teuchos::RCP<
BMV> ()),
175 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
176 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
177 offsetPerBlock_ (blockSize * blockSize),
178 localError_ (new bool (false)),
179 errs_ (new Teuchos::RCP<std::ostringstream> ())
183 TEUCHOS_TEST_FOR_EXCEPTION(
184 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
185 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
186 "rows (isSorted() is false). This class assumes sorted rows.");
188 graphRCP_ = Teuchos::rcpFromRef(graph_);
194 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
195 TEUCHOS_TEST_FOR_EXCEPTION(
196 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
197 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
198 " <= 0. The block size must be positive.");
206 const auto colPointMap = Teuchos::rcp
208 *pointImporter_ = Teuchos::rcp
212 auto local_graph_h = graph.getLocalGraphHost ();
213 auto ptr_h = local_graph_h.row_map;
214 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
217 auto ind_h = local_graph_h.entries;
218 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
222 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
223 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
227 template<
class Scalar,
class LO,
class GO,
class Node>
230 const typename local_matrix_device_type::values_type& values,
231 const LO blockSize) :
234 rowMeshMap_ (* (graph.getRowMap ())),
235 blockSize_ (blockSize),
236 X_colMap_ (new Teuchos::RCP<
BMV> ()),
237 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
238 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
239 offsetPerBlock_ (blockSize * blockSize),
240 localError_ (new bool (false)),
241 errs_ (new Teuchos::RCP<std::ostringstream> ())
244 TEUCHOS_TEST_FOR_EXCEPTION(
245 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
246 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
247 "rows (isSorted() is false). This class assumes sorted rows.");
249 graphRCP_ = Teuchos::rcpFromRef(graph_);
255 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
256 TEUCHOS_TEST_FOR_EXCEPTION(
257 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
258 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
259 " <= 0. The block size must be positive.");
267 const auto colPointMap = Teuchos::rcp
269 *pointImporter_ = Teuchos::rcp
273 auto local_graph_h = graph.getLocalGraphHost ();
274 auto ptr_h = local_graph_h.row_map;
275 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
278 auto ind_h = local_graph_h.entries;
279 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
282 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
283 TEUCHOS_ASSERT_EQUALITY(numValEnt, values.size());
284 val_ = decltype (val_) (values);
288 template<
class Scalar,
class LO,
class GO,
class Node>
293 const LO blockSize) :
296 rowMeshMap_ (* (graph.getRowMap ())),
297 domainPointMap_ (domainPointMap),
298 rangePointMap_ (rangePointMap),
299 blockSize_ (blockSize),
300 X_colMap_ (new Teuchos::RCP<
BMV> ()),
301 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
302 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
303 offsetPerBlock_ (blockSize * blockSize),
304 localError_ (new bool (false)),
305 errs_ (new Teuchos::RCP<std::ostringstream> ())
307 TEUCHOS_TEST_FOR_EXCEPTION(
308 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
309 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
310 "rows (isSorted() is false). This class assumes sorted rows.");
312 graphRCP_ = Teuchos::rcpFromRef(graph_);
318 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
319 TEUCHOS_TEST_FOR_EXCEPTION(
320 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
321 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
322 " <= 0. The block size must be positive.");
326 const auto colPointMap = Teuchos::rcp
328 *pointImporter_ = Teuchos::rcp
332 auto local_graph_h = graph.getLocalGraphHost ();
333 auto ptr_h = local_graph_h.row_map;
334 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
338 auto ind_h = local_graph_h.entries;
339 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
343 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
344 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
349 template<
class Scalar,
class LO,
class GO,
class Node>
350 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
355 return Teuchos::rcp (
new map_type (domainPointMap_));
358 template<
class Scalar,
class LO,
class GO,
class Node>
359 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
364 return Teuchos::rcp (
new map_type (rangePointMap_));
367 template<
class Scalar,
class LO,
class GO,
class Node>
368 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
372 return graph_.getRowMap();
375 template<
class Scalar,
class LO,
class GO,
class Node>
376 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
380 return graph_.getColMap();
383 template<
class Scalar,
class LO,
class GO,
class Node>
388 return graph_.getGlobalNumRows();
391 template<
class Scalar,
class LO,
class GO,
class Node>
396 return graph_.getLocalNumRows();
399 template<
class Scalar,
class LO,
class GO,
class Node>
404 return graph_.getLocalMaxNumRowEntries();
407 template<
class Scalar,
class LO,
class GO,
class Node>
412 Teuchos::ETransp mode,
417 TEUCHOS_TEST_FOR_EXCEPTION(
418 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
419 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
420 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
421 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
423 TEUCHOS_TEST_FOR_EXCEPTION(
425 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
426 "X and Y must both be constant stride");
430 const LO blockSize = getBlockSize ();
432 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
433 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
435 catch (std::invalid_argument& e) {
436 TEUCHOS_TEST_FOR_EXCEPTION(
437 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
438 "apply: Either the input MultiVector X or the output MultiVector Y "
439 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
440 "graph. BlockMultiVector's constructor threw the following exception: "
449 const_cast<this_BCRS_type&
> (*this).applyBlock (X_view, Y_view, mode, alpha, beta);
450 }
catch (std::invalid_argument& e) {
451 TEUCHOS_TEST_FOR_EXCEPTION(
452 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
453 "apply: The implementation method applyBlock complained about having "
454 "an invalid argument. It reported the following: " << e.what ());
455 }
catch (std::logic_error& e) {
456 TEUCHOS_TEST_FOR_EXCEPTION(
457 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
458 "apply: The implementation method applyBlock complained about a "
459 "possible bug in its implementation. It reported the following: "
461 }
catch (std::exception& e) {
462 TEUCHOS_TEST_FOR_EXCEPTION(
463 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
464 "apply: The implementation method applyBlock threw an exception which "
465 "is neither std::invalid_argument nor std::logic_error, but is a "
466 "subclass of std::exception. It reported the following: "
469 TEUCHOS_TEST_FOR_EXCEPTION(
470 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
471 "apply: The implementation method applyBlock threw an exception which "
472 "is not an instance of a subclass of std::exception. This probably "
473 "indicates a bug. Please report this to the Tpetra developers.");
477 template<
class Scalar,
class LO,
class GO,
class Node>
482 Teuchos::ETransp mode,
486 TEUCHOS_TEST_FOR_EXCEPTION(
488 "Tpetra::BlockCrsMatrix::applyBlock: "
489 "X and Y have different block sizes. X.getBlockSize() = "
493 if (mode == Teuchos::NO_TRANS) {
494 applyBlockNoTrans (X, Y, alpha, beta);
495 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
496 applyBlockTrans (X, Y, mode, alpha, beta);
498 TEUCHOS_TEST_FOR_EXCEPTION(
499 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
500 "applyBlock: Invalid 'mode' argument. Valid values are "
501 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
505 template<
class Scalar,
class LO,
class GO,
class Node>
516 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
517 "destMatrix is required to be null.");
521 RCP<crs_graph_type> srcGraph = rcp (
new crs_graph_type(this->getCrsGraph()));
522 RCP<crs_graph_type> destGraph = importAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, importer,
523 srcGraph->getDomainMap(),
524 srcGraph->getRangeMap());
528 destMatrix = rcp (
new this_BCRS_type (*destGraph, getBlockSize()));
533 template<
class Scalar,
class LO,
class GO,
class Node>
544 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
545 "destMatrix is required to be null.");
549 RCP<crs_graph_type> srcGraph = rcp (
new crs_graph_type(this->getCrsGraph()));
550 RCP<crs_graph_type> destGraph = exportAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, exporter,
551 srcGraph->getDomainMap(),
552 srcGraph->getRangeMap());
556 destMatrix = rcp (
new this_BCRS_type (*destGraph, getBlockSize()));
561 template<
class Scalar,
class LO,
class GO,
class Node>
566 auto val_d = val_.getDeviceView(Access::OverwriteAll);
570 template<
class Scalar,
class LO,
class GO,
class Node>
576 const LO numColInds)
const
578 std::vector<ptrdiff_t> offsets(numColInds);
579 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
580 const LO validCount = this->replaceLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
584 template <
class Scalar,
class LO,
class GO,
class Node>
588 Kokkos::MemoryUnmanaged>& offsets)
const
590 graph_.getLocalDiagOffsets (offsets);
593 template <
class Scalar,
class LO,
class GO,
class Node>
597 Kokkos::MemoryUnmanaged>& diag,
598 const Kokkos::View<
const size_t*, device_type,
599 Kokkos::MemoryUnmanaged>& offsets)
const
601 using Kokkos::parallel_for;
602 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
604 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getLocalNumElements ());
605 const LO blockSize = this->getBlockSize ();
606 TEUCHOS_TEST_FOR_EXCEPTION
607 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
608 static_cast<LO> (diag.extent (1)) < blockSize ||
609 static_cast<LO> (diag.extent (2)) < blockSize,
610 std::invalid_argument, prefix <<
611 "The input Kokkos::View is not big enough to hold all the data.");
612 TEUCHOS_TEST_FOR_EXCEPTION
613 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
614 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
615 "diagonal blocks " << lclNumMeshRows <<
".");
617 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
618 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
624 auto val_d = val_.getDeviceView(Access::ReadOnly);
625 parallel_for (policy_type (0, lclNumMeshRows),
626 functor_type (diag, val_d, offsets,
627 graph_.getLocalGraphDevice ().row_map, blockSize_));
630 template<
class Scalar,
class LO,
class GO,
class Node>
636 const LO numColInds)
const
638 std::vector<ptrdiff_t> offsets(numColInds);
639 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
640 const LO validCount = this->absMaxLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
645 template<
class Scalar,
class LO,
class GO,
class Node>
651 const LO numColInds)
const
653 std::vector<ptrdiff_t> offsets(numColInds);
654 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
655 const LO validCount = this->sumIntoLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
658 template<
class Scalar,
class LO,
class GO,
class Node>
662 nonconst_local_inds_host_view_type &Indices,
663 nonconst_values_host_view_type &Values,
664 size_t& NumEntries)
const
666 auto vals = getValuesHost(LocalRow);
667 const LO numInds = ptrHost_(LocalRow+1) - ptrHost_(LocalRow);
668 if (numInds > (LO)Indices.extent(0) || numInds*blockSize_*blockSize_ > (LO)Values.extent(0)) {
669 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
670 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
671 << numInds <<
" row entries");
673 const LO * colInds = indHost_.data() + ptrHost_(LocalRow);
674 for (LO i=0; i<numInds; ++i) {
675 Indices[i] = colInds[i];
677 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
680 NumEntries = numInds;
683 template<
class Scalar,
class LO,
class GO,
class Node>
689 const LO numColInds)
const
691 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
696 return static_cast<LO
> (0);
699 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
703 for (LO k = 0; k < numColInds; ++k) {
704 const LO relBlockOffset =
705 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
706 if (relBlockOffset != LINV) {
707 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
708 hint = relBlockOffset + 1;
716 template<
class Scalar,
class LO,
class GO,
class Node>
720 const ptrdiff_t offsets[],
722 const LO numOffsets)
const
724 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
729 return static_cast<LO
> (0);
736 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
737 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid ();
738 size_t pointOffset = 0;
741 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
742 const size_t blockOffset = offsets[k]*perBlockSize;
743 if (offsets[k] != STINV) {
745 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
754 template<
class Scalar,
class LO,
class GO,
class Node>
758 const ptrdiff_t offsets[],
760 const LO numOffsets)
const
762 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
767 return static_cast<LO
> (0);
769 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
> (vals);
770 auto val_out = getValuesHost(localRowInd);
771 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
773 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
774 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
775 size_t pointOffset = 0;
778 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
779 const size_t blockOffset = offsets[k]*perBlockSize;
780 if (blockOffset != STINV) {
781 little_block_type A_old = getNonConstLocalBlockFromInput (vOut, blockOffset);
782 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
791 template<
class Scalar,
class LO,
class GO,
class Node>
795 const ptrdiff_t offsets[],
797 const LO numOffsets)
const
799 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
804 return static_cast<LO
> (0);
812 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
813 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
814 size_t pointOffset = 0;
817 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
818 const size_t blockOffset = offsets[k]*perBlockSize;
819 if (blockOffset != STINV) {
821 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
822 AXPY (ONE, A_new, A_old);
829 template<
class Scalar,
class LO,
class GO,
class Node>
831 impl_scalar_type_dualview::t_host::const_type
835 return val_.getHostView(Access::ReadOnly);
838 template<
class Scalar,
class LO,
class GO,
class Node>
839 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
840 impl_scalar_type_dualview::t_dev::const_type
841 BlockCrsMatrix<Scalar, LO, GO, Node>::
842 getValuesDevice ()
const
844 return val_.getDeviceView(Access::ReadOnly);
847 template<
class Scalar,
class LO,
class GO,
class Node>
848 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
849 impl_scalar_type_dualview::t_host
853 return val_.getHostView(Access::ReadWrite);
856 template<
class Scalar,
class LO,
class GO,
class Node>
858 impl_scalar_type_dualview::t_dev
862 return val_.getDeviceView(Access::ReadWrite);
865 template<
class Scalar,
class LO,
class GO,
class Node>
866 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
867 impl_scalar_type_dualview::t_host::const_type
868 BlockCrsMatrix<Scalar, LO, GO, Node>::
869 getValuesHost (
const LO& lclRow)
const
871 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
872 auto val = val_.getHostView(Access::ReadOnly);
873 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
877 template<
class Scalar,
class LO,
class GO,
class Node>
879 impl_scalar_type_dualview::t_dev::const_type
883 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
884 auto val = val_.getDeviceView(Access::ReadOnly);
885 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
889 template<
class Scalar,
class LO,
class GO,
class Node>
894 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
895 auto val = val_.getHostView(Access::ReadWrite);
896 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
900 template<
class Scalar,
class LO,
class GO,
class Node>
905 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
906 auto val = val_.getDeviceView(Access::ReadWrite);
907 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
911 template<
class Scalar,
class LO,
class GO,
class Node>
916 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
917 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
918 return static_cast<LO
> (0);
920 return static_cast<LO
> (numEntInGraph);
924 template<
class Scalar,
class LO,
class GO,
class Node>
925 typename BlockCrsMatrix<Scalar, LO, GO, Node>::local_matrix_device_type
929 auto numCols = this->graph_.getColMap()->getLocalNumElements();
930 auto val = val_.getDeviceView(Access::ReadWrite);
931 const LO blockSize = this->getBlockSize ();
932 const auto graph = this->graph_.getLocalGraphDevice ();
934 return local_matrix_device_type(
"Tpetra::BlockCrsMatrix::lclMatrixDevice",
941 template<
class Scalar,
class LO,
class GO,
class Node>
946 const Teuchos::ETransp mode,
956 TEUCHOS_TEST_FOR_EXCEPTION(
957 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
958 "transpose and conjugate transpose modes are not yet implemented.");
961 template<
class Scalar,
class LO,
class GO,
class Node>
963 BlockCrsMatrix<Scalar, LO, GO, Node>::
964 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
965 BlockMultiVector<Scalar, LO, GO, Node>& Y,
971 typedef ::Tpetra::Import<LO, GO, Node> import_type;
972 typedef ::Tpetra::Export<LO, GO, Node> export_type;
973 const Scalar zero = STS::zero ();
974 const Scalar one = STS::one ();
975 RCP<const import_type>
import = graph_.getImporter ();
977 RCP<const export_type> theExport = graph_.getExporter ();
978 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
984 else if (beta != one) {
989 const BMV* X_colMap = NULL;
990 if (
import.is_null ()) {
994 catch (std::exception& e) {
995 TEUCHOS_TEST_FOR_EXCEPTION
996 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
997 "operator= threw an exception: " << std::endl << e.what ());
1007 if ((*X_colMap_).is_null () ||
1008 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1009 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1010 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1011 static_cast<LO
> (X.getNumVectors ())));
1013 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1017 X_colMap = &(**X_colMap_);
1019 catch (std::exception& e) {
1020 TEUCHOS_TEST_FOR_EXCEPTION
1021 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1022 "operator= threw an exception: " << std::endl << e.what ());
1026 BMV* Y_rowMap = NULL;
1027 if (theExport.is_null ()) {
1030 else if ((*Y_rowMap_).is_null () ||
1031 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1032 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1033 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1034 static_cast<LO
> (X.getNumVectors ())));
1036 Y_rowMap = &(**Y_rowMap_);
1038 catch (std::exception& e) {
1039 TEUCHOS_TEST_FOR_EXCEPTION(
1040 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1041 "operator= threw an exception: " << std::endl << e.what ());
1046 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1048 catch (std::exception& e) {
1049 TEUCHOS_TEST_FOR_EXCEPTION
1050 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1051 "an exception: " << e.what ());
1054 TEUCHOS_TEST_FOR_EXCEPTION
1055 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1056 "an exception not a subclass of std::exception.");
1059 if (! theExport.is_null ()) {
1066 template <
class Scalar,
class LO,
class GO,
class Node>
1067 void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
1068 const BlockMultiVector<Scalar, LO, GO, Node> &X,
1069 BlockMultiVector<Scalar, LO, GO, Node> &Y,
const Scalar alpha,
1070 const Scalar beta) {
1072 "Tpetra::BlockCrsMatrix::localApplyBlockNoTrans");
1073 const impl_scalar_type alpha_impl = alpha;
1074 const auto graph = this->graph_.getLocalGraphDevice();
1076 mv_type X_mv = X.getMultiVectorView();
1077 mv_type Y_mv = Y.getMultiVectorView();
1078 auto X_lcl = X_mv.getLocalViewDevice(Access::ReadOnly);
1079 auto Y_lcl = Y_mv.getLocalViewDevice(Access::ReadWrite);
1081 auto A_lcl = getLocalMatrixDevice();
1082 if(!applyHelper.get()) {
1084 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map);
1086 if(applyHelper->shouldUseIntRowptrs())
1088 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
1090 &applyHelper->handle_int, KokkosSparse::NoTranspose,
1091 alpha_impl, A_lcl_int_rowptrs, X_lcl, beta, Y_lcl);
1096 &applyHelper->handle, KokkosSparse::NoTranspose,
1097 alpha_impl, A_lcl, X_lcl, beta, Y_lcl);
1102 template<
class Scalar,
class LO,
class GO,
class Node>
1104 BlockCrsMatrix<Scalar, LO, GO, Node>::
1105 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1106 const LO colIndexToFind,
1107 const LO hint)
const
1109 const size_t absStartOffset = ptrHost_[localRowIndex];
1110 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1111 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1113 const LO*
const curInd = indHost_.data () + absStartOffset;
1116 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1123 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1128 const LO maxNumEntriesForLinearSearch = 32;
1129 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1134 const LO* beg = curInd;
1135 const LO* end = curInd + numEntriesInRow;
1136 std::pair<const LO*, const LO*> p =
1137 std::equal_range (beg, end, colIndexToFind);
1138 if (p.first != p.second) {
1140 relOffset =
static_cast<LO
> (p.first - beg);
1144 for (LO k = 0; k < numEntriesInRow; ++k) {
1145 if (colIndexToFind == curInd[k]) {
1155 template<
class Scalar,
class LO,
class GO,
class Node>
1157 BlockCrsMatrix<Scalar, LO, GO, Node>::
1158 offsetPerBlock ()
const
1160 return offsetPerBlock_;
1163 template<
class Scalar,
class LO,
class GO,
class Node>
1164 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1165 BlockCrsMatrix<Scalar, LO, GO, Node>::
1166 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1167 const size_t pointOffset)
const
1170 const LO rowStride = blockSize_;
1171 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1174 template<
class Scalar,
class LO,
class GO,
class Node>
1175 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1176 BlockCrsMatrix<Scalar, LO, GO, Node>::
1177 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1178 const size_t pointOffset)
const
1181 const LO rowStride = blockSize_;
1182 return little_block_type (val + pointOffset, blockSize_, rowStride);
1185 template<
class Scalar,
class LO,
class GO,
class Node>
1186 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1187 BlockCrsMatrix<Scalar, LO, GO, Node>::
1188 getNonConstLocalBlockFromInputHost (impl_scalar_type* val,
1189 const size_t pointOffset)
const
1192 const LO rowStride = blockSize_;
1193 const size_t bs2 = blockSize_ * blockSize_;
1194 return little_block_host_type (val + bs2 * pointOffset, blockSize_, rowStride);
1197 template<
class Scalar,
class LO,
class GO,
class Node>
1198 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1199 BlockCrsMatrix<Scalar, LO, GO, Node>::
1200 getLocalBlockDeviceNonConst (
const LO localRowInd,
const LO localColInd)
const
1202 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1204 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1205 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1206 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1207 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1208 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesDeviceNonConst();
1209 auto r_val = getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1213 return little_block_type ();
1217 template<
class Scalar,
class LO,
class GO,
class Node>
1218 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1219 BlockCrsMatrix<Scalar, LO, GO, Node>::
1220 getLocalBlockHostNonConst (
const LO localRowInd,
const LO localColInd)
const
1222 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1224 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1225 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1226 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1227 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1228 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesHostNonConst();
1229 auto r_val = getNonConstLocalBlockFromInputHost (vals.data(), absBlockOffset);
1233 return little_block_host_type ();
1238 template<
class Scalar,
class LO,
class GO,
class Node>
1240 BlockCrsMatrix<Scalar, LO, GO, Node>::
1241 checkSizes (const ::Tpetra::SrcDistObject& source)
1244 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_BCRS_type;
1245 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1248 std::ostream& err = markLocalErrorAndGetStream ();
1249 err <<
"checkSizes: The source object of the Import or Export "
1250 "must be a BlockCrsMatrix with the same template parameters as the "
1251 "target object." << endl;
1256 if (src->getBlockSize () != this->getBlockSize ()) {
1257 std::ostream& err = markLocalErrorAndGetStream ();
1258 err <<
"checkSizes: The source and target objects of the Import or "
1259 <<
"Export must have the same block sizes. The source's block "
1260 <<
"size = " << src->getBlockSize () <<
" != the target's block "
1261 <<
"size = " << this->getBlockSize () <<
"." << endl;
1263 if (! src->graph_.isFillComplete ()) {
1264 std::ostream& err = markLocalErrorAndGetStream ();
1265 err <<
"checkSizes: The source object of the Import or Export is "
1266 "not fill complete. Both source and target objects must be fill "
1267 "complete." << endl;
1269 if (! this->graph_.isFillComplete ()) {
1270 std::ostream& err = markLocalErrorAndGetStream ();
1271 err <<
"checkSizes: The target object of the Import or Export is "
1272 "not fill complete. Both source and target objects must be fill "
1273 "complete." << endl;
1275 if (src->graph_.getColMap ().is_null ()) {
1276 std::ostream& err = markLocalErrorAndGetStream ();
1277 err <<
"checkSizes: The source object of the Import or Export does "
1278 "not have a column Map. Both source and target objects must have "
1279 "column Maps." << endl;
1281 if (this->graph_.getColMap ().is_null ()) {
1282 std::ostream& err = markLocalErrorAndGetStream ();
1283 err <<
"checkSizes: The target object of the Import or Export does "
1284 "not have a column Map. Both source and target objects must have "
1285 "column Maps." << endl;
1289 return ! (* (this->localError_));
1292 template<
class Scalar,
class LO,
class GO,
class Node>
1296 (const ::Tpetra::SrcDistObject& source,
1297 const size_t numSameIDs,
1304 using ::Tpetra::Details::Behavior;
1306 using ::Tpetra::Details::ProfilingRegion;
1310 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1311 const bool debug = Behavior::debug();
1312 const bool verbose = Behavior::verbose();
1317 std::ostringstream os;
1318 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1319 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
1324 if (* (this->localError_)) {
1325 std::ostream& err = this->markLocalErrorAndGetStream ();
1327 <<
"The target object of the Import or Export is already in an error state."
1336 std::ostringstream os;
1337 os << prefix << endl
1340 std::cerr << os.str ();
1346 if (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0)) {
1347 std::ostream& err = this->markLocalErrorAndGetStream ();
1349 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent (0)
1350 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
1354 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
1355 std::ostream& err = this->markLocalErrorAndGetStream ();
1357 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1362 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1364 std::ostream& err = this->markLocalErrorAndGetStream ();
1366 <<
"The source (input) object of the Import or "
1367 "Export is either not a BlockCrsMatrix, or does not have the right "
1368 "template parameters. checkSizes() should have caught this. "
1369 "Please report this bug to the Tpetra developers." << endl;
1373 bool lclErr =
false;
1374 #ifdef HAVE_TPETRA_DEBUG
1375 std::set<LO> invalidSrcCopyRows;
1376 std::set<LO> invalidDstCopyRows;
1377 std::set<LO> invalidDstCopyCols;
1378 std::set<LO> invalidDstPermuteCols;
1379 std::set<LO> invalidPermuteFromRows;
1380 #endif // HAVE_TPETRA_DEBUG
1389 #ifdef HAVE_TPETRA_DEBUG
1390 const map_type& srcRowMap = * (src->graph_.getRowMap ());
1391 #endif // HAVE_TPETRA_DEBUG
1392 const map_type& dstRowMap = * (this->graph_.getRowMap ());
1393 const map_type& srcColMap = * (src->graph_.getColMap ());
1394 const map_type& dstColMap = * (this->graph_.getColMap ());
1395 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
1397 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.extent(0));
1399 std::ostringstream os;
1401 <<
"canUseLocalColumnIndices: "
1402 << (canUseLocalColumnIndices ?
"true" :
"false")
1403 <<
", numPermute: " << numPermute
1405 std::cerr << os.str ();
1408 const auto permuteToLIDsHost = permuteToLIDs.view_host();
1409 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
1411 if (canUseLocalColumnIndices) {
1413 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1414 #ifdef HAVE_TPETRA_DEBUG
1417 invalidSrcCopyRows.insert (localRow);
1420 #endif // HAVE_TPETRA_DEBUG
1422 local_inds_host_view_type lclSrcCols;
1423 values_host_view_type vals;
1427 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1428 if (numEntries > 0) {
1429 LO err = this->replaceLocalValues (localRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1430 if (err != numEntries) {
1433 #ifdef HAVE_TPETRA_DEBUG
1434 invalidDstCopyRows.insert (localRow);
1435 #endif // HAVE_TPETRA_DEBUG
1443 for (LO k = 0; k < numEntries; ++k) {
1446 #ifdef HAVE_TPETRA_DEBUG
1447 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
1448 #endif // HAVE_TPETRA_DEBUG
1457 for (
size_t k = 0; k < numPermute; ++k) {
1458 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1459 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1461 local_inds_host_view_type lclSrcCols;
1462 values_host_view_type vals;
1464 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1465 if (numEntries > 0) {
1466 LO err = this->replaceLocalValues (dstLclRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1467 if (err != numEntries) {
1469 #ifdef HAVE_TPETRA_DEBUG
1470 for (LO c = 0; c < numEntries; ++c) {
1472 invalidDstPermuteCols.insert (lclSrcCols[c]);
1475 #endif // HAVE_TPETRA_DEBUG
1482 const size_t maxNumEnt = src->graph_.getLocalMaxNumRowEntries ();
1483 Teuchos::Array<LO> lclDstCols (maxNumEnt);
1486 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1487 local_inds_host_view_type lclSrcCols;
1488 values_host_view_type vals;
1494 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1495 }
catch (std::exception& e) {
1497 std::ostringstream os;
1498 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1499 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1500 << localRow <<
", src->getLocalRowView() threw an exception: "
1502 std::cerr << os.str ();
1507 if (numEntries > 0) {
1508 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1511 std::ostringstream os;
1512 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1513 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1514 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
1515 << maxNumEnt << endl;
1516 std::cerr << os.str ();
1522 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1523 for (LO j = 0; j < numEntries; ++j) {
1525 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1527 #ifdef HAVE_TPETRA_DEBUG
1528 invalidDstCopyCols.insert (lclDstColsView[j]);
1529 #endif // HAVE_TPETRA_DEBUG
1534 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1535 }
catch (std::exception& e) {
1537 std::ostringstream os;
1538 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1539 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1540 << localRow <<
", this->replaceLocalValues() threw an exception: "
1542 std::cerr << os.str ();
1546 if (err != numEntries) {
1549 std::ostringstream os;
1550 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1551 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
1552 "localRow " << localRow <<
", this->replaceLocalValues "
1553 "returned " << err <<
" instead of numEntries = "
1554 << numEntries << endl;
1555 std::cerr << os.str ();
1563 for (
size_t k = 0; k < numPermute; ++k) {
1564 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1565 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1567 local_inds_host_view_type lclSrcCols;
1568 values_host_view_type vals;
1572 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1573 }
catch (std::exception& e) {
1575 std::ostringstream os;
1576 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1577 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
1578 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
1579 <<
", src->getLocalRowView() threw an exception: " << e.what ();
1580 std::cerr << os.str ();
1585 if (numEntries > 0) {
1586 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1592 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1593 for (LO j = 0; j < numEntries; ++j) {
1595 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1597 #ifdef HAVE_TPETRA_DEBUG
1598 invalidDstPermuteCols.insert (lclDstColsView[j]);
1599 #endif // HAVE_TPETRA_DEBUG
1602 LO err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1603 if (err != numEntries) {
1612 std::ostream& err = this->markLocalErrorAndGetStream ();
1613 #ifdef HAVE_TPETRA_DEBUG
1614 err <<
"copyAndPermute: The graph structure of the source of the "
1615 "Import or Export must be a subset of the graph structure of the "
1617 err <<
"invalidSrcCopyRows = [";
1618 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
1619 it != invalidSrcCopyRows.end (); ++it) {
1621 typename std::set<LO>::const_iterator itp1 = it;
1623 if (itp1 != invalidSrcCopyRows.end ()) {
1627 err <<
"], invalidDstCopyRows = [";
1628 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
1629 it != invalidDstCopyRows.end (); ++it) {
1631 typename std::set<LO>::const_iterator itp1 = it;
1633 if (itp1 != invalidDstCopyRows.end ()) {
1637 err <<
"], invalidDstCopyCols = [";
1638 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
1639 it != invalidDstCopyCols.end (); ++it) {
1641 typename std::set<LO>::const_iterator itp1 = it;
1643 if (itp1 != invalidDstCopyCols.end ()) {
1647 err <<
"], invalidDstPermuteCols = [";
1648 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
1649 it != invalidDstPermuteCols.end (); ++it) {
1651 typename std::set<LO>::const_iterator itp1 = it;
1653 if (itp1 != invalidDstPermuteCols.end ()) {
1657 err <<
"], invalidPermuteFromRows = [";
1658 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
1659 it != invalidPermuteFromRows.end (); ++it) {
1661 typename std::set<LO>::const_iterator itp1 = it;
1663 if (itp1 != invalidPermuteFromRows.end ()) {
1669 err <<
"copyAndPermute: The graph structure of the source of the "
1670 "Import or Export must be a subset of the graph structure of the "
1672 #endif // HAVE_TPETRA_DEBUG
1676 std::ostringstream os;
1677 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1678 const bool lclSuccess = ! (* (this->localError_));
1679 os <<
"*** Proc " << myRank <<
": copyAndPermute "
1680 << (lclSuccess ?
"succeeded" :
"FAILED");
1684 os <<
": error messages: " << this->errorMessages ();
1686 std::cerr << os.str ();
1710 template<
class LO,
class GO>
1712 packRowCount (
const size_t numEnt,
1713 const size_t numBytesPerValue,
1714 const size_t blkSize)
1716 using ::Tpetra::Details::PackTraits;
1727 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1728 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
1729 return numEntLen + gidsLen + valsLen;
1743 template<
class ST,
class LO,
class GO>
1745 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
1746 const size_t offset,
1747 const size_t numBytes,
1750 using Kokkos::subview;
1751 using ::Tpetra::Details::PackTraits;
1753 if (numBytes == 0) {
1755 return static_cast<size_t> (0);
1760 TEUCHOS_TEST_FOR_EXCEPTION
1761 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1762 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
1764 const char*
const inBuf = imports.data () + offset;
1766 TEUCHOS_TEST_FOR_EXCEPTION
1767 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1768 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
1770 return static_cast<size_t> (numEntLO);
1777 template<
class ST,
class LO,
class GO>
1779 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
1780 const size_t offset,
1781 const size_t numEnt,
1782 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
1783 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
1784 const size_t numBytesPerValue,
1785 const size_t blockSize)
1787 using Kokkos::subview;
1788 using ::Tpetra::Details::PackTraits;
1794 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1797 const LO numEntLO =
static_cast<size_t> (numEnt);
1799 const size_t numEntBeg = offset;
1801 const size_t gidsBeg = numEntBeg + numEntLen;
1802 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1803 const size_t valsBeg = gidsBeg + gidsLen;
1804 const size_t valsLen = numScalarEnt * numBytesPerValue;
1806 char*
const numEntOut = exports.data () + numEntBeg;
1807 char*
const gidsOut = exports.data () + gidsBeg;
1808 char*
const valsOut = exports.data () + valsBeg;
1810 size_t numBytesOut = 0;
1815 Kokkos::pair<int, size_t> p;
1816 p = PackTraits<GO>::packArray (gidsOut, gidsIn.data (), numEnt);
1817 errorCode += p.first;
1818 numBytesOut += p.second;
1820 p = PackTraits<ST>::packArray (valsOut, valsIn.data (), numScalarEnt);
1821 errorCode += p.first;
1822 numBytesOut += p.second;
1825 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1826 TEUCHOS_TEST_FOR_EXCEPTION
1827 (numBytesOut != expectedNumBytes, std::logic_error,
1828 "packRowForBlockCrs: numBytesOut = " << numBytesOut
1829 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1831 TEUCHOS_TEST_FOR_EXCEPTION
1832 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
1833 "PackTraits::packArray returned a nonzero error code");
1839 template<
class ST,
class LO,
class GO>
1841 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
1842 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
1843 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
1844 const size_t offset,
1845 const size_t numBytes,
1846 const size_t numEnt,
1847 const size_t numBytesPerValue,
1848 const size_t blockSize)
1850 using ::Tpetra::Details::PackTraits;
1852 if (numBytes == 0) {
1856 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1857 TEUCHOS_TEST_FOR_EXCEPTION
1858 (static_cast<size_t> (imports.extent (0)) <= offset,
1859 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1860 << imports.extent (0) <<
" <= offset = " << offset <<
".");
1861 TEUCHOS_TEST_FOR_EXCEPTION
1862 (static_cast<size_t> (imports.extent (0)) < offset + numBytes,
1863 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1864 << imports.extent (0) <<
" < offset + numBytes = "
1865 << (offset + numBytes) <<
".");
1870 const size_t numEntBeg = offset;
1872 const size_t gidsBeg = numEntBeg + numEntLen;
1873 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1874 const size_t valsBeg = gidsBeg + gidsLen;
1875 const size_t valsLen = numScalarEnt * numBytesPerValue;
1877 const char*
const numEntIn = imports.data () + numEntBeg;
1878 const char*
const gidsIn = imports.data () + gidsBeg;
1879 const char*
const valsIn = imports.data () + valsBeg;
1881 size_t numBytesOut = 0;
1885 TEUCHOS_TEST_FOR_EXCEPTION
1886 (static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
1887 "unpackRowForBlockCrs: Expected number of entries " << numEnt
1888 <<
" != actual number of entries " << numEntOut <<
".");
1891 Kokkos::pair<int, size_t> p;
1892 p = PackTraits<GO>::unpackArray (gidsOut.data (), gidsIn, numEnt);
1893 errorCode += p.first;
1894 numBytesOut += p.second;
1896 p = PackTraits<ST>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
1897 errorCode += p.first;
1898 numBytesOut += p.second;
1901 TEUCHOS_TEST_FOR_EXCEPTION
1902 (numBytesOut != numBytes, std::logic_error,
1903 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1904 <<
" != numBytes = " << numBytes <<
".");
1906 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1907 TEUCHOS_TEST_FOR_EXCEPTION
1908 (numBytesOut != expectedNumBytes, std::logic_error,
1909 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1910 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1912 TEUCHOS_TEST_FOR_EXCEPTION
1913 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
1914 "PackTraits::unpackArray returned a nonzero error code");
1920 template<
class Scalar,
class LO,
class GO,
class Node>
1924 (const ::Tpetra::SrcDistObject& source,
1929 Kokkos::DualView<
size_t*,
1931 size_t& constantNumPackets)
1933 using ::Tpetra::Details::Behavior;
1935 using ::Tpetra::Details::ProfilingRegion;
1936 using ::Tpetra::Details::PackTraits;
1938 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space host_exec;
1942 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
1944 const bool debug = Behavior::debug();
1945 const bool verbose = Behavior::verbose();
1950 std::ostringstream os;
1951 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1952 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
1957 if (* (this->localError_)) {
1958 std::ostream& err = this->markLocalErrorAndGetStream ();
1960 <<
"The target object of the Import or Export is already in an error state."
1969 std::ostringstream os;
1970 os << prefix << std::endl
1974 std::cerr << os.str ();
1980 if (exportLIDs.extent (0) != numPacketsPerLID.extent (0)) {
1981 std::ostream& err = this->markLocalErrorAndGetStream ();
1983 <<
"exportLIDs.extent(0) = " << exportLIDs.extent (0)
1984 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
1985 <<
"." << std::endl;
1988 if (exportLIDs.need_sync_host ()) {
1989 std::ostream& err = this->markLocalErrorAndGetStream ();
1990 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
1994 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1996 std::ostream& err = this->markLocalErrorAndGetStream ();
1998 <<
"The source (input) object of the Import or "
1999 "Export is either not a BlockCrsMatrix, or does not have the right "
2000 "template parameters. checkSizes() should have caught this. "
2001 "Please report this bug to the Tpetra developers." << std::endl;
2013 constantNumPackets = 0;
2017 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2018 const size_t numExportLIDs = exportLIDs.extent (0);
2019 size_t numBytesPerValue(0);
2021 auto val_host = val_.getHostView(Access::ReadOnly);
2023 PackTraits<impl_scalar_type>
2031 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
2034 auto exportLIDsHost = exportLIDs.view_host();
2035 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2036 numPacketsPerLID.modify_host ();
2038 rowReducerStruct = Impl::BlockCrsRowStruct<size_t>();
2039 for (
size_t i = 0; i < numExportLIDs; ++i) {
2040 const LO lclRow = exportLIDsHost(i);
2042 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid () ? 0 : numEnt);
2044 const size_t numBytes =
2045 packRowCount<LO, GO> (numEnt, numBytesPerValue, blockSize);
2046 numPacketsPerLIDHost(i) = numBytes;
2047 rowReducerStruct += Impl::BlockCrsRowStruct<size_t>(numEnt, numBytes, numEnt);
2054 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
2055 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
2056 const size_t maxRowLength = rowReducerStruct.maxRowLength;
2059 std::ostringstream os;
2061 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2063 std::cerr << os.str ();
2069 if(exports.extent(0) != totalNumBytes)
2071 const std::string oldLabel = exports.view_device().label ();
2072 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
2073 exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
2075 if (totalNumEntries > 0) {
2077 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs+1);
2079 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs+1);
2080 Kokkos::parallel_scan
2082 [=](
const size_t &i,
size_t &update,
const bool &
final) {
2083 if (
final) offset(i) = update;
2084 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
2087 if (offset(numExportLIDs) != totalNumBytes) {
2088 std::ostream& err = this->markLocalErrorAndGetStream ();
2090 <<
"At end of method, the final offset (in bytes) "
2091 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
2092 << totalNumBytes <<
". "
2093 <<
"Please report this bug to the Tpetra developers." << std::endl;
2107 exports.modify_host();
2109 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2111 policy_type(numExportLIDs, 1, 1)
2112 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2117 Kokkos::parallel_for
2119 [=](
const typename policy_type::member_type &member) {
2120 const size_t i = member.league_rank();
2121 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2122 gblColInds(member.team_scratch(0), maxRowLength);
2124 const LO lclRowInd = exportLIDsHost(i);
2125 local_inds_host_view_type lclColInds;
2126 values_host_view_type vals;
2131 src->getLocalRowView (lclRowInd, lclColInds, vals);
2132 const size_t numEnt = lclColInds.extent(0);
2135 for (
size_t j = 0; j < numEnt; ++j)
2142 const size_t numBytes =
2143 packRowForBlockCrs<impl_scalar_type, LO, GO>
2144 (exports.view_host(),
2147 Kokkos::View<const GO*, host_exec>(gblColInds.data(), maxRowLength),
2154 const size_t offsetDiff = offset(i+1) - offset(i);
2155 if (numBytes != offsetDiff) {
2156 std::ostringstream os;
2158 <<
"numBytes computed from packRowForBlockCrs is different from "
2159 <<
"precomputed offset values, LID = " << i << std::endl;
2160 std::cerr << os.str ();
2169 std::ostringstream os;
2170 const bool lclSuccess = ! (* (this->localError_));
2172 << (lclSuccess ?
"succeeded" :
"FAILED")
2174 std::cerr << os.str ();
2178 template<
class Scalar,
class LO,
class GO,
class Node>
2186 Kokkos::DualView<
size_t*,
2191 using ::Tpetra::Details::Behavior;
2193 using ::Tpetra::Details::ProfilingRegion;
2194 using ::Tpetra::Details::PackTraits;
2197 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
2199 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2200 const bool verbose = Behavior::verbose ();
2205 std::ostringstream os;
2206 auto map = this->graph_.getRowMap ();
2207 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
2208 const int myRank = comm.is_null () ? -1 : comm->getRank ();
2209 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2212 os <<
"Start" << endl;
2213 std::cerr << os.str ();
2218 if (* (this->localError_)) {
2219 std::ostream& err = this->markLocalErrorAndGetStream ();
2220 std::ostringstream os;
2221 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
2223 std::cerr << os.str ();
2232 if (importLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2233 std::ostream& err = this->markLocalErrorAndGetStream ();
2234 std::ostringstream os;
2235 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent (0)
2236 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2239 std::cerr << os.str ();
2245 if (combineMode !=
ADD && combineMode !=
INSERT &&
2247 combineMode !=
ZERO) {
2248 std::ostream& err = this->markLocalErrorAndGetStream ();
2249 std::ostringstream os;
2251 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
2252 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2255 std::cerr << os.str ();
2261 if (this->graph_.getColMap ().is_null ()) {
2262 std::ostream& err = this->markLocalErrorAndGetStream ();
2263 std::ostringstream os;
2264 os << prefix <<
"Target matrix's column Map is null." << endl;
2266 std::cerr << os.str ();
2275 const map_type& tgtColMap = * (this->graph_.getColMap ());
2278 const size_t blockSize = this->getBlockSize ();
2279 const size_t numImportLIDs = importLIDs.extent(0);
2286 size_t numBytesPerValue(0);
2288 auto val_host = val_.getHostView(Access::ReadOnly);
2290 PackTraits<impl_scalar_type>::packValueCount
2293 const size_t maxRowNumEnt = graph_.getLocalMaxNumRowEntries ();
2294 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
2297 std::ostringstream os;
2298 os << prefix <<
"combineMode: "
2299 << ::Tpetra::combineModeToString (combineMode)
2300 <<
", blockSize: " << blockSize
2301 <<
", numImportLIDs: " << numImportLIDs
2302 <<
", numBytesPerValue: " << numBytesPerValue
2303 <<
", maxRowNumEnt: " << maxRowNumEnt
2304 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
2305 std::cerr << os.str ();
2308 if (combineMode ==
ZERO || numImportLIDs == 0) {
2310 std::ostringstream os;
2311 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
2312 std::cerr << os.str ();
2319 if (imports.need_sync_host ()) {
2320 imports.sync_host ();
2330 if (imports.need_sync_host () || numPacketsPerLID.need_sync_host () ||
2331 importLIDs.need_sync_host ()) {
2332 std::ostream& err = this->markLocalErrorAndGetStream ();
2333 std::ostringstream os;
2334 os << prefix <<
"All input DualViews must be sync'd to host by now. "
2335 <<
"imports_nc.need_sync_host()="
2336 << (imports.need_sync_host () ?
"true" :
"false")
2337 <<
", numPacketsPerLID.need_sync_host()="
2338 << (numPacketsPerLID.need_sync_host () ?
"true" :
"false")
2339 <<
", importLIDs.need_sync_host()="
2340 << (importLIDs.need_sync_host () ?
"true" :
"false")
2343 std::cerr << os.str ();
2349 const auto importLIDsHost = importLIDs.view_host ();
2350 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host ();
2356 Kokkos::View<size_t*, host_exec> offset (
"offset", numImportLIDs+1);
2358 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numImportLIDs+1);
2359 Kokkos::parallel_scan
2360 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
2361 [=] (
const size_t &i,
size_t &update,
const bool &
final) {
2362 if (
final) offset(i) = update;
2363 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
2373 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2374 errorDuringUnpack (
"errorDuringUnpack");
2375 errorDuringUnpack () = 0;
2377 using policy_type = Kokkos::TeamPolicy<host_exec>;
2378 size_t scratch_per_row =
sizeof(GO) * maxRowNumEnt +
sizeof (LO) * maxRowNumEnt + numBytesPerValue * maxRowNumScalarEnt
2381 const auto policy = policy_type (numImportLIDs, 1, 1)
2382 .set_scratch_size (0, Kokkos::PerTeam (scratch_per_row));
2383 using host_scratch_space =
typename host_exec::scratch_memory_space;
2385 using pair_type = Kokkos::pair<size_t, size_t>;
2388 getValuesHostNonConst();
2390 Kokkos::parallel_for
2391 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
2392 [=] (
const typename policy_type::member_type& member) {
2393 const size_t i = member.league_rank();
2394 Kokkos::View<GO*, host_scratch_space> gblColInds
2395 (member.team_scratch (0), maxRowNumEnt);
2396 Kokkos::View<LO*, host_scratch_space> lclColInds
2397 (member.team_scratch (0), maxRowNumEnt);
2398 Kokkos::View<impl_scalar_type*, host_scratch_space> vals
2399 (member.team_scratch (0), maxRowNumScalarEnt);
2402 const size_t offval = offset(i);
2403 const LO lclRow = importLIDsHost(i);
2404 const size_t numBytes = numPacketsPerLIDHost(i);
2405 const size_t numEnt =
2406 unpackRowCount<impl_scalar_type, LO, GO>
2407 (imports.view_host (), offval, numBytes, numBytesPerValue);
2410 if (numEnt > maxRowNumEnt) {
2411 errorDuringUnpack() = 1;
2413 std::ostringstream os;
2415 <<
"At i = " << i <<
", numEnt = " << numEnt
2416 <<
" > maxRowNumEnt = " << maxRowNumEnt
2418 std::cerr << os.str ();
2423 const size_t numScalarEnt = numEnt*blockSize*blockSize;
2424 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
2425 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
2426 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
2431 size_t numBytesOut = 0;
2434 unpackRowForBlockCrs<impl_scalar_type, LO, GO>
2435 (Kokkos::View<GO*,host_exec>(gidsOut.data(), numEnt),
2436 Kokkos::View<impl_scalar_type*,host_exec>(valsOut.data(), numScalarEnt),
2437 imports.view_host(),
2438 offval, numBytes, numEnt,
2439 numBytesPerValue, blockSize);
2441 catch (std::exception& e) {
2442 errorDuringUnpack() = 1;
2444 std::ostringstream os;
2445 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
2446 << e.what () << endl;
2447 std::cerr << os.str ();
2452 if (numBytes != numBytesOut) {
2453 errorDuringUnpack() = 1;
2455 std::ostringstream os;
2457 <<
"At i = " << i <<
", numBytes = " << numBytes
2458 <<
" != numBytesOut = " << numBytesOut <<
"."
2460 std::cerr << os.str();
2466 for (
size_t k = 0; k < numEnt; ++k) {
2468 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
2470 std::ostringstream os;
2472 <<
"At i = " << i <<
", GID " << gidsOut(k)
2473 <<
" is not owned by the calling process."
2475 std::cerr << os.str();
2483 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
2484 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>
2486 if (combineMode ==
ADD) {
2487 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2488 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
2489 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2490 }
else if (combineMode ==
ABSMAX) {
2491 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2494 if (static_cast<LO> (numEnt) != numCombd) {
2495 errorDuringUnpack() = 1;
2497 std::ostringstream os;
2498 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
2499 <<
" != numCombd = " << numCombd <<
"."
2501 std::cerr << os.str ();
2509 if (errorDuringUnpack () != 0) {
2510 std::ostream& err = this->markLocalErrorAndGetStream ();
2511 err << prefix <<
"Unpacking failed.";
2513 err <<
" Please run again with the environment variable setting "
2514 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
2520 std::ostringstream os;
2521 const bool lclSuccess = ! (* (this->localError_));
2523 << (lclSuccess ?
"succeeded" :
"FAILED")
2525 std::cerr << os.str ();
2529 template<
class Scalar,
class LO,
class GO,
class Node>
2533 using Teuchos::TypeNameTraits;
2534 std::ostringstream os;
2535 os <<
"\"Tpetra::BlockCrsMatrix\": { "
2536 <<
"Template parameters: { "
2537 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
2538 <<
"LO: " << TypeNameTraits<LO>::name ()
2539 <<
"GO: " << TypeNameTraits<GO>::name ()
2540 <<
"Node: " << TypeNameTraits<Node>::name ()
2542 <<
", Label: \"" << this->getObjectLabel () <<
"\""
2543 <<
", Global dimensions: ["
2544 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2545 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
2546 <<
", Block size: " << getBlockSize ()
2552 template<
class Scalar,
class LO,
class GO,
class Node>
2556 const Teuchos::EVerbosityLevel verbLevel)
const
2558 using Teuchos::ArrayRCP;
2559 using Teuchos::CommRequest;
2560 using Teuchos::FancyOStream;
2561 using Teuchos::getFancyOStream;
2562 using Teuchos::ireceive;
2563 using Teuchos::isend;
2564 using Teuchos::outArg;
2565 using Teuchos::TypeNameTraits;
2566 using Teuchos::VERB_DEFAULT;
2567 using Teuchos::VERB_NONE;
2568 using Teuchos::VERB_LOW;
2569 using Teuchos::VERB_MEDIUM;
2571 using Teuchos::VERB_EXTREME;
2573 using Teuchos::wait;
2577 const Teuchos::EVerbosityLevel vl =
2578 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2580 if (vl == VERB_NONE) {
2585 Teuchos::OSTab tab0 (out);
2587 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
2588 Teuchos::OSTab tab1 (out);
2590 out <<
"Template parameters:" << endl;
2592 Teuchos::OSTab tab2 (out);
2593 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
2594 <<
"LO: " << TypeNameTraits<LO>::name () << endl
2595 <<
"GO: " << TypeNameTraits<GO>::name () << endl
2596 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
2598 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
2599 <<
"Global dimensions: ["
2600 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2601 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
2603 const LO blockSize = getBlockSize ();
2604 out <<
"Block size: " << blockSize << endl;
2607 if (vl >= VERB_MEDIUM) {
2608 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2609 const int myRank = comm.getRank ();
2611 out <<
"Row Map:" << endl;
2613 getRowMap()->describe (out, vl);
2615 if (! getColMap ().is_null ()) {
2616 if (getColMap() == getRowMap()) {
2618 out <<
"Column Map: same as row Map" << endl;
2623 out <<
"Column Map:" << endl;
2625 getColMap ()->describe (out, vl);
2628 if (! getDomainMap ().is_null ()) {
2629 if (getDomainMap () == getRowMap ()) {
2631 out <<
"Domain Map: same as row Map" << endl;
2634 else if (getDomainMap () == getColMap ()) {
2636 out <<
"Domain Map: same as column Map" << endl;
2641 out <<
"Domain Map:" << endl;
2643 getDomainMap ()->describe (out, vl);
2646 if (! getRangeMap ().is_null ()) {
2647 if (getRangeMap () == getDomainMap ()) {
2649 out <<
"Range Map: same as domain Map" << endl;
2652 else if (getRangeMap () == getRowMap ()) {
2654 out <<
"Range Map: same as row Map" << endl;
2659 out <<
"Range Map: " << endl;
2661 getRangeMap ()->describe (out, vl);
2666 if (vl >= VERB_EXTREME) {
2667 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2668 const int myRank = comm.getRank ();
2669 const int numProcs = comm.getSize ();
2672 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
2673 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
2674 FancyOStream& os = *osPtr;
2675 os <<
"Process " << myRank <<
":" << endl;
2676 Teuchos::OSTab tab2 (os);
2678 const map_type& meshRowMap = * (graph_.getRowMap ());
2679 const map_type& meshColMap = * (graph_.getColMap ());
2684 os <<
"Row " << meshGblRow <<
": {";
2686 local_inds_host_view_type lclColInds;
2687 values_host_view_type vals;
2689 this->getLocalRowView (meshLclRow, lclColInds, vals); numInds = lclColInds.extent(0);
2691 for (LO k = 0; k < numInds; ++k) {
2694 os <<
"Col " << gblCol <<
": [";
2695 for (LO i = 0; i < blockSize; ++i) {
2696 for (LO j = 0; j < blockSize; ++j) {
2697 os << vals[blockSize*blockSize*k + i*blockSize + j];
2698 if (j + 1 < blockSize) {
2702 if (i + 1 < blockSize) {
2707 if (k + 1 < numInds) {
2717 out << lclOutStrPtr->str ();
2718 lclOutStrPtr = Teuchos::null;
2721 const int sizeTag = 1337;
2722 const int dataTag = 1338;
2724 ArrayRCP<char> recvDataBuf;
2728 for (
int p = 1; p < numProcs; ++p) {
2731 ArrayRCP<size_t> recvSize (1);
2733 RCP<CommRequest<int> > recvSizeReq =
2734 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
2735 wait<int> (comm, outArg (recvSizeReq));
2736 const size_t numCharsToRecv = recvSize[0];
2743 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
2744 recvDataBuf.resize (numCharsToRecv + 1);
2746 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
2748 RCP<CommRequest<int> > recvDataReq =
2749 ireceive<int, char> (recvData, p, dataTag, comm);
2750 wait<int> (comm, outArg (recvDataReq));
2755 recvDataBuf[numCharsToRecv] =
'\0';
2756 out << recvDataBuf.getRawPtr ();
2758 else if (myRank == p) {
2762 const std::string stringToSend = lclOutStrPtr->str ();
2763 lclOutStrPtr = Teuchos::null;
2766 const size_t numCharsToSend = stringToSend.size ();
2767 ArrayRCP<size_t> sendSize (1);
2768 sendSize[0] = numCharsToSend;
2769 RCP<CommRequest<int> > sendSizeReq =
2770 isend<int, size_t> (sendSize, 0, sizeTag, comm);
2771 wait<int> (comm, outArg (sendSizeReq));
2779 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
2780 RCP<CommRequest<int> > sendDataReq =
2781 isend<int, char> (sendData, 0, dataTag, comm);
2782 wait<int> (comm, outArg (sendDataReq));
2788 template<
class Scalar,
class LO,
class GO,
class Node>
2789 Teuchos::RCP<const Teuchos::Comm<int> >
2793 return graph_.getComm();
2797 template<
class Scalar,
class LO,
class GO,
class Node>
2802 return graph_.getGlobalNumCols();
2806 template<
class Scalar,
class LO,
class GO,
class Node>
2811 return graph_.getLocalNumCols();
2814 template<
class Scalar,
class LO,
class GO,
class Node>
2819 return graph_.getIndexBase();
2822 template<
class Scalar,
class LO,
class GO,
class Node>
2827 return graph_.getGlobalNumEntries();
2830 template<
class Scalar,
class LO,
class GO,
class Node>
2835 return graph_.getLocalNumEntries();
2838 template<
class Scalar,
class LO,
class GO,
class Node>
2843 return graph_.getNumEntriesInGlobalRow(globalRow);
2847 template<
class Scalar,
class LO,
class GO,
class Node>
2852 return graph_.getGlobalMaxNumRowEntries();
2855 template<
class Scalar,
class LO,
class GO,
class Node>
2860 return graph_.hasColMap();
2864 template<
class Scalar,
class LO,
class GO,
class Node>
2869 return graph_.isLocallyIndexed();
2872 template<
class Scalar,
class LO,
class GO,
class Node>
2877 return graph_.isGloballyIndexed();
2880 template<
class Scalar,
class LO,
class GO,
class Node>
2885 return graph_.isFillComplete ();
2888 template<
class Scalar,
class LO,
class GO,
class Node>
2896 template<
class Scalar,
class LO,
class GO,
class Node>
2900 nonconst_global_inds_host_view_type &,
2901 nonconst_values_host_view_type &,
2904 TEUCHOS_TEST_FOR_EXCEPTION(
2905 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
2906 "This class doesn't support global matrix indexing.");
2910 template<
class Scalar,
class LO,
class GO,
class Node>
2914 global_inds_host_view_type &,
2915 values_host_view_type &)
const
2917 TEUCHOS_TEST_FOR_EXCEPTION(
2918 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
2919 "This class doesn't support global matrix indexing.");
2923 template<
class Scalar,
class LO,
class GO,
class Node>
2927 local_inds_host_view_type &colInds,
2928 values_host_view_type &vals)
const
2930 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
2931 colInds = local_inds_host_view_type();
2932 vals = values_host_view_type();
2935 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2936 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2937 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
2939 vals = getValuesHost (localRowInd);
2943 template<
class Scalar,
class LO,
class GO,
class Node>
2947 local_inds_host_view_type &colInds,
2948 nonconst_values_host_view_type &vals)
const
2950 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
2951 colInds = nonconst_local_inds_host_view_type();
2952 vals = nonconst_values_host_view_type();
2955 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2956 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2957 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
2964 template<
class Scalar,
class LO,
class GO,
class Node>
2969 const size_t lclNumMeshRows = graph_.getLocalNumRows ();
2971 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
2972 graph_.getLocalDiagOffsets (diagOffsets);
2975 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
2979 auto vals_host_out = val_.getHostView(Access::ReadOnly);
2983 size_t rowOffset = 0;
2985 LO bs = getBlockSize();
2986 for(
size_t r=0; r<getLocalNumRows(); r++)
2989 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
2990 for(
int b=0; b<bs; b++)
2995 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3001 template<
class Scalar,
class LO,
class GO,
class Node>
3006 TEUCHOS_TEST_FOR_EXCEPTION(
3007 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3008 "not implemented.");
3012 template<
class Scalar,
class LO,
class GO,
class Node>
3017 TEUCHOS_TEST_FOR_EXCEPTION(
3018 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3019 "not implemented.");
3023 template<
class Scalar,
class LO,
class GO,
class Node>
3024 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3031 template<
class Scalar,
class LO,
class GO,
class Node>
3032 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3036 TEUCHOS_TEST_FOR_EXCEPTION(
3037 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3038 "not implemented.");
3048 #define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3049 template class BlockCrsMatrix< S, LO, GO, NODE >;
3051 #endif // TPETRA_BLOCKCRSMATRIX_DEF_HPP
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
Kokkos::View< impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
std::string description() const override
One-line description of this object.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
void exportAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Export< LO, GO, Node > &exporter) const
Import from this to the given destination matrix, and make the result fill complete.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
LO local_ordinal_type
The type of local indices.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
One or more distributed dense vectors.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Linear algebra kernels for small dense matrices and vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Scalar scalar_type
The type of entries in the matrix (that is, of each entry in each block).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
size_t getLocalNumRows() const override
get the local number of block rows
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
Kokkos::LayoutRight BlockCrsMatrixLittleBlockArrayLayout
give an option to use layoutleft
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
local_ordinal_type getMinLocalIndex() const
The minimum local index.
MultiVector for multiple degrees of freedom per mesh point.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
size_t global_size_t
Global size_t object.
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
global_size_t getGlobalNumRows() const override
get the global number of block rows
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value)
Replace current value at the specified location with specified values.
local_ordinal_type getMaxLocalIndex() const
The maximum local index on the calling process.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
CombineMode
Rule for combining data in an Import or Export.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
Replace old value with maximum of magnitudes of old and new values.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
Replace existing values with new values.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
Replace old values with zero.
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
char packet_type
Implementation detail; tells.
local_matrix_device_type getLocalMatrixDevice() const
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
A distributed dense vector.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Base class for distributed Tpetra objects that support data redistribution.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
void importAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Import< LO, GO, Node > &importer) const
Import from this to the given destination matrix, and make the result fill complete.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row. ...