12 #ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
13 #define TPETRA_BLOCKCRSMATRIX_DEF_HPP
21 #include "Tpetra_BlockMultiVector.hpp"
24 #include "Teuchos_TimeMonitor.hpp"
25 #ifdef HAVE_TPETRA_DEBUG
27 #endif // HAVE_TPETRA_DEBUG
29 #include "KokkosSparse_spmv.hpp"
36 struct BlockCrsRowStruct {
37 T totalNumEntries, totalNumBytes, maxRowLength;
38 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
39 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
40 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
41 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
46 KOKKOS_INLINE_FUNCTION
47 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
48 a.totalNumEntries += b.totalNumEntries;
49 a.totalNumBytes += b.totalNumBytes;
50 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
53 template<
typename T,
typename ExecSpace>
54 struct BlockCrsReducer {
55 typedef BlockCrsReducer reducer;
57 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
60 KOKKOS_INLINE_FUNCTION
61 BlockCrsReducer(value_type &val) : value(&val) {}
63 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
64 KOKKOS_INLINE_FUNCTION
void join(value_type &dst,
const value_type &src)
const { dst += src; }
65 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
66 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
67 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
76 template<
class Scalar,
class LO,
class GO,
class Node>
77 class GetLocalDiagCopy {
79 typedef typename Node::device_type device_type;
80 typedef size_t diag_offset_type;
81 typedef Kokkos::View<
const size_t*, device_type,
82 Kokkos::MemoryUnmanaged> diag_offsets_type;
83 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
85 typedef typename local_graph_device_type::row_map_type row_offsets_type;
86 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
87 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
88 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
91 GetLocalDiagCopy (
const diag_type& diag,
92 const values_type& val,
93 const diag_offsets_type& diagOffsets,
94 const row_offsets_type& ptr,
97 diagOffsets_ (diagOffsets),
99 blockSize_ (blockSize),
100 offsetPerBlock_ (blockSize_*blockSize_),
104 KOKKOS_INLINE_FUNCTION
void
105 operator() (
const LO& lclRowInd)
const
110 const size_t absOffset = ptr_[lclRowInd];
113 const size_t relOffset = diagOffsets_[lclRowInd];
116 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
121 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
122 const_little_block_type;
123 const_little_block_type D_in (val_.data () + pointOffset,
124 blockSize_, blockSize_);
125 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
131 diag_offsets_type diagOffsets_;
132 row_offsets_type ptr_;
139 template<
class Scalar,
class LO,
class GO,
class Node>
141 BlockCrsMatrix<Scalar, LO, GO, Node>::
142 markLocalErrorAndGetStream ()
144 * (this->localError_) =
true;
145 if ((*errs_).is_null ()) {
146 *errs_ = Teuchos::rcp (
new std::ostringstream ());
151 template<
class Scalar,
class LO,
class GO,
class Node>
155 graph_ (Teuchos::rcp (new
map_type ()), 0),
156 blockSize_ (static_cast<LO> (0)),
157 X_colMap_ (new Teuchos::RCP<
BMV> ()),
158 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
159 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
161 localError_ (new bool (false)),
162 errs_ (new Teuchos::RCP<std::ostringstream> ())
166 template<
class Scalar,
class LO,
class GO,
class Node>
169 const LO blockSize) :
172 rowMeshMap_ (* (graph.getRowMap ())),
173 blockSize_ (blockSize),
174 X_colMap_ (new Teuchos::RCP<
BMV> ()),
175 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
176 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
177 offsetPerBlock_ (blockSize * blockSize),
178 localError_ (new bool (false)),
179 errs_ (new Teuchos::RCP<std::ostringstream> ())
183 TEUCHOS_TEST_FOR_EXCEPTION(
184 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
185 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
186 "rows (isSorted() is false). This class assumes sorted rows.");
188 graphRCP_ = Teuchos::rcpFromRef(graph_);
194 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
195 TEUCHOS_TEST_FOR_EXCEPTION(
196 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
197 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
198 " <= 0. The block size must be positive.");
206 const auto colPointMap = Teuchos::rcp
208 *pointImporter_ = Teuchos::rcp
212 auto local_graph_h = graph.getLocalGraphHost ();
213 auto ptr_h = local_graph_h.row_map;
214 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
217 auto ind_h = local_graph_h.entries;
218 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
222 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
223 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
227 template<
class Scalar,
class LO,
class GO,
class Node>
230 const typename local_matrix_device_type::values_type& values,
231 const LO blockSize) :
234 rowMeshMap_ (* (graph.getRowMap ())),
235 blockSize_ (blockSize),
236 X_colMap_ (new Teuchos::RCP<
BMV> ()),
237 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
238 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
239 offsetPerBlock_ (blockSize * blockSize),
240 localError_ (new bool (false)),
241 errs_ (new Teuchos::RCP<std::ostringstream> ())
244 TEUCHOS_TEST_FOR_EXCEPTION(
245 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
246 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
247 "rows (isSorted() is false). This class assumes sorted rows.");
249 graphRCP_ = Teuchos::rcpFromRef(graph_);
255 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
256 TEUCHOS_TEST_FOR_EXCEPTION(
257 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
258 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
259 " <= 0. The block size must be positive.");
267 const auto colPointMap = Teuchos::rcp
269 *pointImporter_ = Teuchos::rcp
273 auto local_graph_h = graph.getLocalGraphHost ();
274 auto ptr_h = local_graph_h.row_map;
275 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
278 auto ind_h = local_graph_h.entries;
279 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
282 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
283 TEUCHOS_ASSERT_EQUALITY(numValEnt, values.size());
284 val_ = decltype (val_) (values);
288 template<
class Scalar,
class LO,
class GO,
class Node>
293 const LO blockSize) :
296 rowMeshMap_ (* (graph.getRowMap ())),
297 domainPointMap_ (domainPointMap),
298 rangePointMap_ (rangePointMap),
299 blockSize_ (blockSize),
300 X_colMap_ (new Teuchos::RCP<
BMV> ()),
301 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
302 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
303 offsetPerBlock_ (blockSize * blockSize),
304 localError_ (new bool (false)),
305 errs_ (new Teuchos::RCP<std::ostringstream> ())
307 TEUCHOS_TEST_FOR_EXCEPTION(
308 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
309 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
310 "rows (isSorted() is false). This class assumes sorted rows.");
312 graphRCP_ = Teuchos::rcpFromRef(graph_);
318 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
319 TEUCHOS_TEST_FOR_EXCEPTION(
320 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
321 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
322 " <= 0. The block size must be positive.");
326 const auto colPointMap = Teuchos::rcp
328 *pointImporter_ = Teuchos::rcp
332 auto local_graph_h = graph.getLocalGraphHost ();
333 auto ptr_h = local_graph_h.row_map;
334 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
338 auto ind_h = local_graph_h.entries;
339 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
343 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
344 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
349 template<
class Scalar,
class LO,
class GO,
class Node>
350 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
355 return Teuchos::rcp (
new map_type (domainPointMap_));
358 template<
class Scalar,
class LO,
class GO,
class Node>
359 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
364 return Teuchos::rcp (
new map_type (rangePointMap_));
367 template<
class Scalar,
class LO,
class GO,
class Node>
368 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
372 return graph_.getRowMap();
375 template<
class Scalar,
class LO,
class GO,
class Node>
376 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
380 return graph_.getColMap();
383 template<
class Scalar,
class LO,
class GO,
class Node>
388 return graph_.getGlobalNumRows();
391 template<
class Scalar,
class LO,
class GO,
class Node>
396 return graph_.getLocalNumRows();
399 template<
class Scalar,
class LO,
class GO,
class Node>
404 return graph_.getLocalMaxNumRowEntries();
407 template<
class Scalar,
class LO,
class GO,
class Node>
412 Teuchos::ETransp mode,
417 TEUCHOS_TEST_FOR_EXCEPTION(
418 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
419 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
420 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
421 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
423 TEUCHOS_TEST_FOR_EXCEPTION(
425 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
426 "X and Y must both be constant stride");
430 const LO blockSize = getBlockSize ();
432 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
433 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
435 catch (std::invalid_argument& e) {
436 TEUCHOS_TEST_FOR_EXCEPTION(
437 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
438 "apply: Either the input MultiVector X or the output MultiVector Y "
439 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
440 "graph. BlockMultiVector's constructor threw the following exception: "
449 const_cast<this_BCRS_type&
> (*this).applyBlock (X_view, Y_view, mode, alpha, beta);
450 }
catch (std::invalid_argument& e) {
451 TEUCHOS_TEST_FOR_EXCEPTION(
452 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
453 "apply: The implementation method applyBlock complained about having "
454 "an invalid argument. It reported the following: " << e.what ());
455 }
catch (std::logic_error& e) {
456 TEUCHOS_TEST_FOR_EXCEPTION(
457 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
458 "apply: The implementation method applyBlock complained about a "
459 "possible bug in its implementation. It reported the following: "
461 }
catch (std::exception& e) {
462 TEUCHOS_TEST_FOR_EXCEPTION(
463 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
464 "apply: The implementation method applyBlock threw an exception which "
465 "is neither std::invalid_argument nor std::logic_error, but is a "
466 "subclass of std::exception. It reported the following: "
469 TEUCHOS_TEST_FOR_EXCEPTION(
470 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
471 "apply: The implementation method applyBlock threw an exception which "
472 "is not an instance of a subclass of std::exception. This probably "
473 "indicates a bug. Please report this to the Tpetra developers.");
477 template<
class Scalar,
class LO,
class GO,
class Node>
482 Teuchos::ETransp mode,
486 TEUCHOS_TEST_FOR_EXCEPTION(
488 "Tpetra::BlockCrsMatrix::applyBlock: "
489 "X and Y have different block sizes. X.getBlockSize() = "
493 if (mode == Teuchos::NO_TRANS) {
494 applyBlockNoTrans (X, Y, alpha, beta);
495 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
496 applyBlockTrans (X, Y, mode, alpha, beta);
498 TEUCHOS_TEST_FOR_EXCEPTION(
499 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
500 "applyBlock: Invalid 'mode' argument. Valid values are "
501 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
505 template<
class Scalar,
class LO,
class GO,
class Node>
516 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
517 "destMatrix is required to be null.");
521 RCP<crs_graph_type> srcGraph = rcp (
new crs_graph_type(this->getCrsGraph()));
522 RCP<crs_graph_type> destGraph = importAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, importer,
523 srcGraph->getDomainMap(),
524 srcGraph->getRangeMap());
528 destMatrix = rcp (
new this_BCRS_type (*destGraph, getBlockSize()));
533 template<
class Scalar,
class LO,
class GO,
class Node>
544 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
545 "destMatrix is required to be null.");
549 RCP<crs_graph_type> srcGraph = rcp (
new crs_graph_type(this->getCrsGraph()));
550 RCP<crs_graph_type> destGraph = exportAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, exporter,
551 srcGraph->getDomainMap(),
552 srcGraph->getRangeMap());
556 destMatrix = rcp (
new this_BCRS_type (*destGraph, getBlockSize()));
561 template<
class Scalar,
class LO,
class GO,
class Node>
566 auto val_d = val_.getDeviceView(Access::OverwriteAll);
570 template<
class Scalar,
class LO,
class GO,
class Node>
576 const LO numColInds)
const
578 std::vector<ptrdiff_t> offsets(numColInds);
579 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
580 const LO validCount = this->replaceLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
584 template <
class Scalar,
class LO,
class GO,
class Node>
588 Kokkos::MemoryUnmanaged>& offsets)
const
590 graph_.getLocalDiagOffsets (offsets);
593 template <
class Scalar,
class LO,
class GO,
class Node>
597 Kokkos::MemoryUnmanaged>& diag,
598 const Kokkos::View<
const size_t*, device_type,
599 Kokkos::MemoryUnmanaged>& offsets)
const
601 using Kokkos::parallel_for;
602 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
604 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getLocalNumElements ());
605 const LO blockSize = this->getBlockSize ();
606 TEUCHOS_TEST_FOR_EXCEPTION
607 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
608 static_cast<LO> (diag.extent (1)) < blockSize ||
609 static_cast<LO> (diag.extent (2)) < blockSize,
610 std::invalid_argument, prefix <<
611 "The input Kokkos::View is not big enough to hold all the data.");
612 TEUCHOS_TEST_FOR_EXCEPTION
613 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
614 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
615 "diagonal blocks " << lclNumMeshRows <<
".");
617 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
618 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
624 auto val_d = val_.getDeviceView(Access::ReadOnly);
625 parallel_for (policy_type (0, lclNumMeshRows),
626 functor_type (diag, val_d, offsets,
627 graph_.getLocalGraphDevice ().row_map, blockSize_));
630 template<
class Scalar,
class LO,
class GO,
class Node>
636 const LO numColInds)
const
638 std::vector<ptrdiff_t> offsets(numColInds);
639 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
640 const LO validCount = this->absMaxLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
645 template<
class Scalar,
class LO,
class GO,
class Node>
651 const LO numColInds)
const
653 std::vector<ptrdiff_t> offsets(numColInds);
654 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
655 const LO validCount = this->sumIntoLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
658 template<
class Scalar,
class LO,
class GO,
class Node>
662 nonconst_local_inds_host_view_type &Indices,
663 nonconst_values_host_view_type &Values,
664 size_t& NumEntries)
const
666 auto vals = getValuesHost(LocalRow);
667 const LO numInds = ptrHost_(LocalRow+1) - ptrHost_(LocalRow);
668 if (numInds > (LO)Indices.extent(0) || numInds*blockSize_*blockSize_ > (LO)Values.extent(0)) {
669 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
670 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
671 << numInds <<
" row entries");
673 const LO * colInds = indHost_.data() + ptrHost_(LocalRow);
674 for (LO i=0; i<numInds; ++i) {
675 Indices[i] = colInds[i];
677 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
680 NumEntries = numInds;
683 template<
class Scalar,
class LO,
class GO,
class Node>
689 const LO numColInds)
const
691 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
696 return static_cast<LO
> (0);
699 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
703 for (LO k = 0; k < numColInds; ++k) {
704 const LO relBlockOffset =
705 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
706 if (relBlockOffset != LINV) {
707 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
708 hint = relBlockOffset + 1;
716 template<
class Scalar,
class LO,
class GO,
class Node>
720 const ptrdiff_t offsets[],
722 const LO numOffsets)
const
724 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
729 return static_cast<LO
> (0);
736 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
737 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid ();
738 size_t pointOffset = 0;
741 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
742 const size_t blockOffset = offsets[k]*perBlockSize;
743 if (offsets[k] != STINV) {
745 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
754 template<
class Scalar,
class LO,
class GO,
class Node>
758 const ptrdiff_t offsets[],
760 const LO numOffsets)
const
762 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
767 return static_cast<LO
> (0);
769 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
> (vals);
770 auto val_out = getValuesHost(localRowInd);
771 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
773 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
774 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
775 size_t pointOffset = 0;
778 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
779 const size_t blockOffset = offsets[k]*perBlockSize;
780 if (blockOffset != STINV) {
781 little_block_type A_old = getNonConstLocalBlockFromInput (vOut, blockOffset);
782 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
791 template<
class Scalar,
class LO,
class GO,
class Node>
795 const ptrdiff_t offsets[],
797 const LO numOffsets)
const
799 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
804 return static_cast<LO
> (0);
812 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
813 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
814 size_t pointOffset = 0;
817 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
818 const size_t blockOffset = offsets[k]*perBlockSize;
819 if (blockOffset != STINV) {
821 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
822 AXPY (ONE, A_new, A_old);
829 template<
class Scalar,
class LO,
class GO,
class Node>
831 impl_scalar_type_dualview::t_host::const_type
835 return val_.getHostView(Access::ReadOnly);
838 template<
class Scalar,
class LO,
class GO,
class Node>
839 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
840 impl_scalar_type_dualview::t_dev::const_type
841 BlockCrsMatrix<Scalar, LO, GO, Node>::
842 getValuesDevice ()
const
844 return val_.getDeviceView(Access::ReadOnly);
847 template<
class Scalar,
class LO,
class GO,
class Node>
848 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
849 impl_scalar_type_dualview::t_host
853 return val_.getHostView(Access::ReadWrite);
856 template<
class Scalar,
class LO,
class GO,
class Node>
858 impl_scalar_type_dualview::t_dev
862 return val_.getDeviceView(Access::ReadWrite);
865 template<
class Scalar,
class LO,
class GO,
class Node>
866 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
867 impl_scalar_type_dualview::t_host::const_type
868 BlockCrsMatrix<Scalar, LO, GO, Node>::
869 getValuesHost (
const LO& lclRow)
const
871 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
872 auto val = val_.getHostView(Access::ReadOnly);
873 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
877 template<
class Scalar,
class LO,
class GO,
class Node>
879 impl_scalar_type_dualview::t_dev::const_type
883 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
884 auto val = val_.getDeviceView(Access::ReadOnly);
885 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
889 template<
class Scalar,
class LO,
class GO,
class Node>
894 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
895 auto val = val_.getHostView(Access::ReadWrite);
896 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
900 template<
class Scalar,
class LO,
class GO,
class Node>
905 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
906 auto val = val_.getDeviceView(Access::ReadWrite);
907 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
911 template<
class Scalar,
class LO,
class GO,
class Node>
916 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
917 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
918 return static_cast<LO
> (0);
920 return static_cast<LO
> (numEntInGraph);
924 template<
class Scalar,
class LO,
class GO,
class Node>
925 typename BlockCrsMatrix<Scalar, LO, GO, Node>::local_matrix_device_type
929 auto numCols = this->graph_.getColMap()->getLocalNumElements();
930 auto val = val_.getDeviceView(Access::ReadWrite);
931 const LO blockSize = this->getBlockSize ();
932 const auto graph = this->graph_.getLocalGraphDevice ();
934 return local_matrix_device_type(
"Tpetra::BlockCrsMatrix::lclMatrixDevice",
941 template<
class Scalar,
class LO,
class GO,
class Node>
946 const Teuchos::ETransp mode,
956 TEUCHOS_TEST_FOR_EXCEPTION(
957 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
958 "transpose and conjugate transpose modes are not yet implemented.");
961 template<
class Scalar,
class LO,
class GO,
class Node>
963 BlockCrsMatrix<Scalar, LO, GO, Node>::
964 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
965 BlockMultiVector<Scalar, LO, GO, Node>& Y,
971 typedef ::Tpetra::Import<LO, GO, Node> import_type;
972 typedef ::Tpetra::Export<LO, GO, Node> export_type;
973 const Scalar zero = STS::zero ();
974 const Scalar one = STS::one ();
975 RCP<const import_type>
import = graph_.getImporter ();
977 RCP<const export_type> theExport = graph_.getExporter ();
978 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
984 else if (beta != one) {
989 const BMV* X_colMap = NULL;
990 if (
import.is_null ()) {
994 catch (std::exception& e) {
995 TEUCHOS_TEST_FOR_EXCEPTION
996 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
997 "operator= threw an exception: " << std::endl << e.what ());
1007 if ((*X_colMap_).is_null () ||
1008 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1009 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1010 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1011 static_cast<LO
> (X.getNumVectors ())));
1013 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1017 X_colMap = &(**X_colMap_);
1019 catch (std::exception& e) {
1020 TEUCHOS_TEST_FOR_EXCEPTION
1021 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1022 "operator= threw an exception: " << std::endl << e.what ());
1026 BMV* Y_rowMap = NULL;
1027 if (theExport.is_null ()) {
1030 else if ((*Y_rowMap_).is_null () ||
1031 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1032 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1033 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1034 static_cast<LO
> (X.getNumVectors ())));
1036 Y_rowMap = &(**Y_rowMap_);
1038 catch (std::exception& e) {
1039 TEUCHOS_TEST_FOR_EXCEPTION(
1040 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1041 "operator= threw an exception: " << std::endl << e.what ());
1046 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1048 catch (std::exception& e) {
1049 TEUCHOS_TEST_FOR_EXCEPTION
1050 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1051 "an exception: " << e.what ());
1054 TEUCHOS_TEST_FOR_EXCEPTION
1055 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1056 "an exception not a subclass of std::exception.");
1059 if (! theExport.is_null ()) {
1066 template <
class Scalar,
class LO,
class GO,
class Node>
1067 void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
1068 const BlockMultiVector<Scalar, LO, GO, Node> &X,
1069 BlockMultiVector<Scalar, LO, GO, Node> &Y,
const Scalar alpha,
1070 const Scalar beta) {
1072 "Tpetra::BlockCrsMatrix::localApplyBlockNoTrans");
1073 const impl_scalar_type alpha_impl = alpha;
1074 const auto graph = this->graph_.getLocalGraphDevice();
1076 mv_type X_mv = X.getMultiVectorView();
1077 mv_type Y_mv = Y.getMultiVectorView();
1078 auto X_lcl = X_mv.getLocalViewDevice(Access::ReadOnly);
1079 auto Y_lcl = Y_mv.getLocalViewDevice(Access::ReadWrite);
1081 #if KOKKOSKERNELS_VERSION >= 40299
1082 auto A_lcl = getLocalMatrixDevice();
1083 if(!applyHelper.get()) {
1085 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map);
1087 if(applyHelper->shouldUseIntRowptrs())
1089 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
1091 &applyHelper->handle_int, KokkosSparse::NoTranspose,
1092 alpha_impl, A_lcl_int_rowptrs, X_lcl, beta, Y_lcl);
1097 &applyHelper->handle, KokkosSparse::NoTranspose,
1098 alpha_impl, A_lcl, X_lcl, beta, Y_lcl);
1101 auto A_lcl = getLocalMatrixDevice();
1102 KokkosSparse::spmv(KokkosSparse::NoTranspose, alpha_impl, A_lcl, X_lcl, beta,
1108 template<
class Scalar,
class LO,
class GO,
class Node>
1110 BlockCrsMatrix<Scalar, LO, GO, Node>::
1111 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1112 const LO colIndexToFind,
1113 const LO hint)
const
1115 const size_t absStartOffset = ptrHost_[localRowIndex];
1116 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1117 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1119 const LO*
const curInd = indHost_.data () + absStartOffset;
1122 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1129 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1134 const LO maxNumEntriesForLinearSearch = 32;
1135 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1140 const LO* beg = curInd;
1141 const LO* end = curInd + numEntriesInRow;
1142 std::pair<const LO*, const LO*> p =
1143 std::equal_range (beg, end, colIndexToFind);
1144 if (p.first != p.second) {
1146 relOffset =
static_cast<LO
> (p.first - beg);
1150 for (LO k = 0; k < numEntriesInRow; ++k) {
1151 if (colIndexToFind == curInd[k]) {
1161 template<
class Scalar,
class LO,
class GO,
class Node>
1163 BlockCrsMatrix<Scalar, LO, GO, Node>::
1164 offsetPerBlock ()
const
1166 return offsetPerBlock_;
1169 template<
class Scalar,
class LO,
class GO,
class Node>
1170 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1171 BlockCrsMatrix<Scalar, LO, GO, Node>::
1172 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1173 const size_t pointOffset)
const
1176 const LO rowStride = blockSize_;
1177 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1180 template<
class Scalar,
class LO,
class GO,
class Node>
1181 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1182 BlockCrsMatrix<Scalar, LO, GO, Node>::
1183 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1184 const size_t pointOffset)
const
1187 const LO rowStride = blockSize_;
1188 return little_block_type (val + pointOffset, blockSize_, rowStride);
1191 template<
class Scalar,
class LO,
class GO,
class Node>
1192 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1193 BlockCrsMatrix<Scalar, LO, GO, Node>::
1194 getNonConstLocalBlockFromInputHost (impl_scalar_type* val,
1195 const size_t pointOffset)
const
1198 const LO rowStride = blockSize_;
1199 const size_t bs2 = blockSize_ * blockSize_;
1200 return little_block_host_type (val + bs2 * pointOffset, blockSize_, rowStride);
1203 template<
class Scalar,
class LO,
class GO,
class Node>
1204 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1205 BlockCrsMatrix<Scalar, LO, GO, Node>::
1206 getLocalBlockDeviceNonConst (
const LO localRowInd,
const LO localColInd)
const
1208 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1210 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1211 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1212 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1213 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1214 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesDeviceNonConst();
1215 auto r_val = getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1219 return little_block_type ();
1223 template<
class Scalar,
class LO,
class GO,
class Node>
1224 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1225 BlockCrsMatrix<Scalar, LO, GO, Node>::
1226 getLocalBlockHostNonConst (
const LO localRowInd,
const LO localColInd)
const
1228 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1230 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1231 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1232 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1233 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1234 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesHostNonConst();
1235 auto r_val = getNonConstLocalBlockFromInputHost (vals.data(), absBlockOffset);
1239 return little_block_host_type ();
1244 template<
class Scalar,
class LO,
class GO,
class Node>
1246 BlockCrsMatrix<Scalar, LO, GO, Node>::
1247 checkSizes (const ::Tpetra::SrcDistObject& source)
1250 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_BCRS_type;
1251 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1254 std::ostream& err = markLocalErrorAndGetStream ();
1255 err <<
"checkSizes: The source object of the Import or Export "
1256 "must be a BlockCrsMatrix with the same template parameters as the "
1257 "target object." << endl;
1262 if (src->getBlockSize () != this->getBlockSize ()) {
1263 std::ostream& err = markLocalErrorAndGetStream ();
1264 err <<
"checkSizes: The source and target objects of the Import or "
1265 <<
"Export must have the same block sizes. The source's block "
1266 <<
"size = " << src->getBlockSize () <<
" != the target's block "
1267 <<
"size = " << this->getBlockSize () <<
"." << endl;
1269 if (! src->graph_.isFillComplete ()) {
1270 std::ostream& err = markLocalErrorAndGetStream ();
1271 err <<
"checkSizes: The source object of the Import or Export is "
1272 "not fill complete. Both source and target objects must be fill "
1273 "complete." << endl;
1275 if (! this->graph_.isFillComplete ()) {
1276 std::ostream& err = markLocalErrorAndGetStream ();
1277 err <<
"checkSizes: The target object of the Import or Export is "
1278 "not fill complete. Both source and target objects must be fill "
1279 "complete." << endl;
1281 if (src->graph_.getColMap ().is_null ()) {
1282 std::ostream& err = markLocalErrorAndGetStream ();
1283 err <<
"checkSizes: The source object of the Import or Export does "
1284 "not have a column Map. Both source and target objects must have "
1285 "column Maps." << endl;
1287 if (this->graph_.getColMap ().is_null ()) {
1288 std::ostream& err = markLocalErrorAndGetStream ();
1289 err <<
"checkSizes: The target object of the Import or Export does "
1290 "not have a column Map. Both source and target objects must have "
1291 "column Maps." << endl;
1295 return ! (* (this->localError_));
1298 template<
class Scalar,
class LO,
class GO,
class Node>
1302 (const ::Tpetra::SrcDistObject& source,
1303 const size_t numSameIDs,
1310 using ::Tpetra::Details::Behavior;
1312 using ::Tpetra::Details::ProfilingRegion;
1316 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1317 const bool debug = Behavior::debug();
1318 const bool verbose = Behavior::verbose();
1323 std::ostringstream os;
1324 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1325 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
1330 if (* (this->localError_)) {
1331 std::ostream& err = this->markLocalErrorAndGetStream ();
1333 <<
"The target object of the Import or Export is already in an error state."
1342 std::ostringstream os;
1343 os << prefix << endl
1346 std::cerr << os.str ();
1352 if (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0)) {
1353 std::ostream& err = this->markLocalErrorAndGetStream ();
1355 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent (0)
1356 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
1360 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
1361 std::ostream& err = this->markLocalErrorAndGetStream ();
1363 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1368 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1370 std::ostream& err = this->markLocalErrorAndGetStream ();
1372 <<
"The source (input) object of the Import or "
1373 "Export is either not a BlockCrsMatrix, or does not have the right "
1374 "template parameters. checkSizes() should have caught this. "
1375 "Please report this bug to the Tpetra developers." << endl;
1379 bool lclErr =
false;
1380 #ifdef HAVE_TPETRA_DEBUG
1381 std::set<LO> invalidSrcCopyRows;
1382 std::set<LO> invalidDstCopyRows;
1383 std::set<LO> invalidDstCopyCols;
1384 std::set<LO> invalidDstPermuteCols;
1385 std::set<LO> invalidPermuteFromRows;
1386 #endif // HAVE_TPETRA_DEBUG
1395 #ifdef HAVE_TPETRA_DEBUG
1396 const map_type& srcRowMap = * (src->graph_.getRowMap ());
1397 #endif // HAVE_TPETRA_DEBUG
1398 const map_type& dstRowMap = * (this->graph_.getRowMap ());
1399 const map_type& srcColMap = * (src->graph_.getColMap ());
1400 const map_type& dstColMap = * (this->graph_.getColMap ());
1401 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
1403 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.extent(0));
1405 std::ostringstream os;
1407 <<
"canUseLocalColumnIndices: "
1408 << (canUseLocalColumnIndices ?
"true" :
"false")
1409 <<
", numPermute: " << numPermute
1411 std::cerr << os.str ();
1414 const auto permuteToLIDsHost = permuteToLIDs.view_host();
1415 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
1417 if (canUseLocalColumnIndices) {
1419 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1420 #ifdef HAVE_TPETRA_DEBUG
1423 invalidSrcCopyRows.insert (localRow);
1426 #endif // HAVE_TPETRA_DEBUG
1428 local_inds_host_view_type lclSrcCols;
1429 values_host_view_type vals;
1433 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1434 if (numEntries > 0) {
1435 LO err = this->replaceLocalValues (localRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1436 if (err != numEntries) {
1439 #ifdef HAVE_TPETRA_DEBUG
1440 invalidDstCopyRows.insert (localRow);
1441 #endif // HAVE_TPETRA_DEBUG
1449 for (LO k = 0; k < numEntries; ++k) {
1452 #ifdef HAVE_TPETRA_DEBUG
1453 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
1454 #endif // HAVE_TPETRA_DEBUG
1463 for (
size_t k = 0; k < numPermute; ++k) {
1464 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1465 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1467 local_inds_host_view_type lclSrcCols;
1468 values_host_view_type vals;
1470 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1471 if (numEntries > 0) {
1472 LO err = this->replaceLocalValues (dstLclRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1473 if (err != numEntries) {
1475 #ifdef HAVE_TPETRA_DEBUG
1476 for (LO c = 0; c < numEntries; ++c) {
1478 invalidDstPermuteCols.insert (lclSrcCols[c]);
1481 #endif // HAVE_TPETRA_DEBUG
1488 const size_t maxNumEnt = src->graph_.getLocalMaxNumRowEntries ();
1489 Teuchos::Array<LO> lclDstCols (maxNumEnt);
1492 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1493 local_inds_host_view_type lclSrcCols;
1494 values_host_view_type vals;
1500 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1501 }
catch (std::exception& e) {
1503 std::ostringstream os;
1504 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1505 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1506 << localRow <<
", src->getLocalRowView() threw an exception: "
1508 std::cerr << os.str ();
1513 if (numEntries > 0) {
1514 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1517 std::ostringstream os;
1518 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1519 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1520 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
1521 << maxNumEnt << endl;
1522 std::cerr << os.str ();
1528 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1529 for (LO j = 0; j < numEntries; ++j) {
1531 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1533 #ifdef HAVE_TPETRA_DEBUG
1534 invalidDstCopyCols.insert (lclDstColsView[j]);
1535 #endif // HAVE_TPETRA_DEBUG
1540 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1541 }
catch (std::exception& e) {
1543 std::ostringstream os;
1544 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1545 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1546 << localRow <<
", this->replaceLocalValues() threw an exception: "
1548 std::cerr << os.str ();
1552 if (err != numEntries) {
1555 std::ostringstream os;
1556 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1557 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
1558 "localRow " << localRow <<
", this->replaceLocalValues "
1559 "returned " << err <<
" instead of numEntries = "
1560 << numEntries << endl;
1561 std::cerr << os.str ();
1569 for (
size_t k = 0; k < numPermute; ++k) {
1570 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1571 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1573 local_inds_host_view_type lclSrcCols;
1574 values_host_view_type vals;
1578 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1579 }
catch (std::exception& e) {
1581 std::ostringstream os;
1582 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1583 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
1584 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
1585 <<
", src->getLocalRowView() threw an exception: " << e.what ();
1586 std::cerr << os.str ();
1591 if (numEntries > 0) {
1592 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1598 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1599 for (LO j = 0; j < numEntries; ++j) {
1601 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1603 #ifdef HAVE_TPETRA_DEBUG
1604 invalidDstPermuteCols.insert (lclDstColsView[j]);
1605 #endif // HAVE_TPETRA_DEBUG
1608 LO err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1609 if (err != numEntries) {
1618 std::ostream& err = this->markLocalErrorAndGetStream ();
1619 #ifdef HAVE_TPETRA_DEBUG
1620 err <<
"copyAndPermute: The graph structure of the source of the "
1621 "Import or Export must be a subset of the graph structure of the "
1623 err <<
"invalidSrcCopyRows = [";
1624 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
1625 it != invalidSrcCopyRows.end (); ++it) {
1627 typename std::set<LO>::const_iterator itp1 = it;
1629 if (itp1 != invalidSrcCopyRows.end ()) {
1633 err <<
"], invalidDstCopyRows = [";
1634 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
1635 it != invalidDstCopyRows.end (); ++it) {
1637 typename std::set<LO>::const_iterator itp1 = it;
1639 if (itp1 != invalidDstCopyRows.end ()) {
1643 err <<
"], invalidDstCopyCols = [";
1644 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
1645 it != invalidDstCopyCols.end (); ++it) {
1647 typename std::set<LO>::const_iterator itp1 = it;
1649 if (itp1 != invalidDstCopyCols.end ()) {
1653 err <<
"], invalidDstPermuteCols = [";
1654 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
1655 it != invalidDstPermuteCols.end (); ++it) {
1657 typename std::set<LO>::const_iterator itp1 = it;
1659 if (itp1 != invalidDstPermuteCols.end ()) {
1663 err <<
"], invalidPermuteFromRows = [";
1664 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
1665 it != invalidPermuteFromRows.end (); ++it) {
1667 typename std::set<LO>::const_iterator itp1 = it;
1669 if (itp1 != invalidPermuteFromRows.end ()) {
1675 err <<
"copyAndPermute: The graph structure of the source of the "
1676 "Import or Export must be a subset of the graph structure of the "
1678 #endif // HAVE_TPETRA_DEBUG
1682 std::ostringstream os;
1683 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1684 const bool lclSuccess = ! (* (this->localError_));
1685 os <<
"*** Proc " << myRank <<
": copyAndPermute "
1686 << (lclSuccess ?
"succeeded" :
"FAILED");
1690 os <<
": error messages: " << this->errorMessages ();
1692 std::cerr << os.str ();
1716 template<
class LO,
class GO>
1718 packRowCount (
const size_t numEnt,
1719 const size_t numBytesPerValue,
1720 const size_t blkSize)
1722 using ::Tpetra::Details::PackTraits;
1733 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1734 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
1735 return numEntLen + gidsLen + valsLen;
1749 template<
class ST,
class LO,
class GO>
1751 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
1752 const size_t offset,
1753 const size_t numBytes,
1756 using Kokkos::subview;
1757 using ::Tpetra::Details::PackTraits;
1759 if (numBytes == 0) {
1761 return static_cast<size_t> (0);
1766 TEUCHOS_TEST_FOR_EXCEPTION
1767 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1768 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
1770 const char*
const inBuf = imports.data () + offset;
1772 TEUCHOS_TEST_FOR_EXCEPTION
1773 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1774 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
1776 return static_cast<size_t> (numEntLO);
1783 template<
class ST,
class LO,
class GO>
1785 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
1786 const size_t offset,
1787 const size_t numEnt,
1788 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
1789 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
1790 const size_t numBytesPerValue,
1791 const size_t blockSize)
1793 using Kokkos::subview;
1794 using ::Tpetra::Details::PackTraits;
1800 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1803 const LO numEntLO =
static_cast<size_t> (numEnt);
1805 const size_t numEntBeg = offset;
1807 const size_t gidsBeg = numEntBeg + numEntLen;
1808 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1809 const size_t valsBeg = gidsBeg + gidsLen;
1810 const size_t valsLen = numScalarEnt * numBytesPerValue;
1812 char*
const numEntOut = exports.data () + numEntBeg;
1813 char*
const gidsOut = exports.data () + gidsBeg;
1814 char*
const valsOut = exports.data () + valsBeg;
1816 size_t numBytesOut = 0;
1821 Kokkos::pair<int, size_t> p;
1822 p = PackTraits<GO>::packArray (gidsOut, gidsIn.data (), numEnt);
1823 errorCode += p.first;
1824 numBytesOut += p.second;
1826 p = PackTraits<ST>::packArray (valsOut, valsIn.data (), numScalarEnt);
1827 errorCode += p.first;
1828 numBytesOut += p.second;
1831 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1832 TEUCHOS_TEST_FOR_EXCEPTION
1833 (numBytesOut != expectedNumBytes, std::logic_error,
1834 "packRowForBlockCrs: numBytesOut = " << numBytesOut
1835 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1837 TEUCHOS_TEST_FOR_EXCEPTION
1838 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
1839 "PackTraits::packArray returned a nonzero error code");
1845 template<
class ST,
class LO,
class GO>
1847 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
1848 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
1849 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
1850 const size_t offset,
1851 const size_t numBytes,
1852 const size_t numEnt,
1853 const size_t numBytesPerValue,
1854 const size_t blockSize)
1856 using ::Tpetra::Details::PackTraits;
1858 if (numBytes == 0) {
1862 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1863 TEUCHOS_TEST_FOR_EXCEPTION
1864 (static_cast<size_t> (imports.extent (0)) <= offset,
1865 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1866 << imports.extent (0) <<
" <= offset = " << offset <<
".");
1867 TEUCHOS_TEST_FOR_EXCEPTION
1868 (static_cast<size_t> (imports.extent (0)) < offset + numBytes,
1869 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1870 << imports.extent (0) <<
" < offset + numBytes = "
1871 << (offset + numBytes) <<
".");
1876 const size_t numEntBeg = offset;
1878 const size_t gidsBeg = numEntBeg + numEntLen;
1879 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1880 const size_t valsBeg = gidsBeg + gidsLen;
1881 const size_t valsLen = numScalarEnt * numBytesPerValue;
1883 const char*
const numEntIn = imports.data () + numEntBeg;
1884 const char*
const gidsIn = imports.data () + gidsBeg;
1885 const char*
const valsIn = imports.data () + valsBeg;
1887 size_t numBytesOut = 0;
1891 TEUCHOS_TEST_FOR_EXCEPTION
1892 (static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
1893 "unpackRowForBlockCrs: Expected number of entries " << numEnt
1894 <<
" != actual number of entries " << numEntOut <<
".");
1897 Kokkos::pair<int, size_t> p;
1898 p = PackTraits<GO>::unpackArray (gidsOut.data (), gidsIn, numEnt);
1899 errorCode += p.first;
1900 numBytesOut += p.second;
1902 p = PackTraits<ST>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
1903 errorCode += p.first;
1904 numBytesOut += p.second;
1907 TEUCHOS_TEST_FOR_EXCEPTION
1908 (numBytesOut != numBytes, std::logic_error,
1909 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1910 <<
" != numBytes = " << numBytes <<
".");
1912 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1913 TEUCHOS_TEST_FOR_EXCEPTION
1914 (numBytesOut != expectedNumBytes, std::logic_error,
1915 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1916 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1918 TEUCHOS_TEST_FOR_EXCEPTION
1919 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
1920 "PackTraits::unpackArray returned a nonzero error code");
1926 template<
class Scalar,
class LO,
class GO,
class Node>
1930 (const ::Tpetra::SrcDistObject& source,
1935 Kokkos::DualView<
size_t*,
1937 size_t& constantNumPackets)
1939 using ::Tpetra::Details::Behavior;
1941 using ::Tpetra::Details::ProfilingRegion;
1942 using ::Tpetra::Details::PackTraits;
1944 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space host_exec;
1948 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
1950 const bool debug = Behavior::debug();
1951 const bool verbose = Behavior::verbose();
1956 std::ostringstream os;
1957 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1958 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
1963 if (* (this->localError_)) {
1964 std::ostream& err = this->markLocalErrorAndGetStream ();
1966 <<
"The target object of the Import or Export is already in an error state."
1975 std::ostringstream os;
1976 os << prefix << std::endl
1980 std::cerr << os.str ();
1986 if (exportLIDs.extent (0) != numPacketsPerLID.extent (0)) {
1987 std::ostream& err = this->markLocalErrorAndGetStream ();
1989 <<
"exportLIDs.extent(0) = " << exportLIDs.extent (0)
1990 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
1991 <<
"." << std::endl;
1994 if (exportLIDs.need_sync_host ()) {
1995 std::ostream& err = this->markLocalErrorAndGetStream ();
1996 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
2000 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
2002 std::ostream& err = this->markLocalErrorAndGetStream ();
2004 <<
"The source (input) object of the Import or "
2005 "Export is either not a BlockCrsMatrix, or does not have the right "
2006 "template parameters. checkSizes() should have caught this. "
2007 "Please report this bug to the Tpetra developers." << std::endl;
2019 constantNumPackets = 0;
2023 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2024 const size_t numExportLIDs = exportLIDs.extent (0);
2025 size_t numBytesPerValue(0);
2027 auto val_host = val_.getHostView(Access::ReadOnly);
2029 PackTraits<impl_scalar_type>
2037 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
2040 auto exportLIDsHost = exportLIDs.view_host();
2041 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2042 numPacketsPerLID.modify_host ();
2044 rowReducerStruct = Impl::BlockCrsRowStruct<size_t>();
2045 for (
size_t i = 0; i < numExportLIDs; ++i) {
2046 const LO lclRow = exportLIDsHost(i);
2048 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid () ? 0 : numEnt);
2050 const size_t numBytes =
2051 packRowCount<LO, GO> (numEnt, numBytesPerValue, blockSize);
2052 numPacketsPerLIDHost(i) = numBytes;
2053 rowReducerStruct += Impl::BlockCrsRowStruct<size_t>(numEnt, numBytes, numEnt);
2060 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
2061 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
2062 const size_t maxRowLength = rowReducerStruct.maxRowLength;
2065 std::ostringstream os;
2067 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2069 std::cerr << os.str ();
2075 if(exports.extent(0) != totalNumBytes)
2077 const std::string oldLabel = exports.d_view.label ();
2078 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
2079 exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
2081 if (totalNumEntries > 0) {
2083 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs+1);
2085 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs+1);
2086 Kokkos::parallel_scan
2088 [=](
const size_t &i,
size_t &update,
const bool &
final) {
2089 if (
final) offset(i) = update;
2090 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
2093 if (offset(numExportLIDs) != totalNumBytes) {
2094 std::ostream& err = this->markLocalErrorAndGetStream ();
2096 <<
"At end of method, the final offset (in bytes) "
2097 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
2098 << totalNumBytes <<
". "
2099 <<
"Please report this bug to the Tpetra developers." << std::endl;
2113 exports.modify_host();
2115 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2117 policy_type(numExportLIDs, 1, 1)
2118 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2123 Kokkos::parallel_for
2125 [=](
const typename policy_type::member_type &member) {
2126 const size_t i = member.league_rank();
2127 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2128 gblColInds(member.team_scratch(0), maxRowLength);
2130 const LO lclRowInd = exportLIDsHost(i);
2131 local_inds_host_view_type lclColInds;
2132 values_host_view_type vals;
2137 src->getLocalRowView (lclRowInd, lclColInds, vals);
2138 const size_t numEnt = lclColInds.extent(0);
2141 for (
size_t j = 0; j < numEnt; ++j)
2148 const size_t numBytes =
2149 packRowForBlockCrs<impl_scalar_type, LO, GO>
2150 (exports.view_host(),
2153 Kokkos::View<const GO*, host_exec>(gblColInds.data(), maxRowLength),
2160 const size_t offsetDiff = offset(i+1) - offset(i);
2161 if (numBytes != offsetDiff) {
2162 std::ostringstream os;
2164 <<
"numBytes computed from packRowForBlockCrs is different from "
2165 <<
"precomputed offset values, LID = " << i << std::endl;
2166 std::cerr << os.str ();
2175 std::ostringstream os;
2176 const bool lclSuccess = ! (* (this->localError_));
2178 << (lclSuccess ?
"succeeded" :
"FAILED")
2180 std::cerr << os.str ();
2184 template<
class Scalar,
class LO,
class GO,
class Node>
2192 Kokkos::DualView<
size_t*,
2197 using ::Tpetra::Details::Behavior;
2199 using ::Tpetra::Details::ProfilingRegion;
2200 using ::Tpetra::Details::PackTraits;
2203 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
2205 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2206 const bool verbose = Behavior::verbose ();
2211 std::ostringstream os;
2212 auto map = this->graph_.getRowMap ();
2213 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
2214 const int myRank = comm.is_null () ? -1 : comm->getRank ();
2215 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2218 os <<
"Start" << endl;
2219 std::cerr << os.str ();
2224 if (* (this->localError_)) {
2225 std::ostream& err = this->markLocalErrorAndGetStream ();
2226 std::ostringstream os;
2227 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
2229 std::cerr << os.str ();
2238 if (importLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2239 std::ostream& err = this->markLocalErrorAndGetStream ();
2240 std::ostringstream os;
2241 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent (0)
2242 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2245 std::cerr << os.str ();
2251 if (combineMode !=
ADD && combineMode !=
INSERT &&
2253 combineMode !=
ZERO) {
2254 std::ostream& err = this->markLocalErrorAndGetStream ();
2255 std::ostringstream os;
2257 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
2258 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2261 std::cerr << os.str ();
2267 if (this->graph_.getColMap ().is_null ()) {
2268 std::ostream& err = this->markLocalErrorAndGetStream ();
2269 std::ostringstream os;
2270 os << prefix <<
"Target matrix's column Map is null." << endl;
2272 std::cerr << os.str ();
2281 const map_type& tgtColMap = * (this->graph_.getColMap ());
2284 const size_t blockSize = this->getBlockSize ();
2285 const size_t numImportLIDs = importLIDs.extent(0);
2292 size_t numBytesPerValue(0);
2294 auto val_host = val_.getHostView(Access::ReadOnly);
2296 PackTraits<impl_scalar_type>::packValueCount
2299 const size_t maxRowNumEnt = graph_.getLocalMaxNumRowEntries ();
2300 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
2303 std::ostringstream os;
2304 os << prefix <<
"combineMode: "
2305 << ::Tpetra::combineModeToString (combineMode)
2306 <<
", blockSize: " << blockSize
2307 <<
", numImportLIDs: " << numImportLIDs
2308 <<
", numBytesPerValue: " << numBytesPerValue
2309 <<
", maxRowNumEnt: " << maxRowNumEnt
2310 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
2311 std::cerr << os.str ();
2314 if (combineMode ==
ZERO || numImportLIDs == 0) {
2316 std::ostringstream os;
2317 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
2318 std::cerr << os.str ();
2325 if (imports.need_sync_host ()) {
2326 imports.sync_host ();
2336 if (imports.need_sync_host () || numPacketsPerLID.need_sync_host () ||
2337 importLIDs.need_sync_host ()) {
2338 std::ostream& err = this->markLocalErrorAndGetStream ();
2339 std::ostringstream os;
2340 os << prefix <<
"All input DualViews must be sync'd to host by now. "
2341 <<
"imports_nc.need_sync_host()="
2342 << (imports.need_sync_host () ?
"true" :
"false")
2343 <<
", numPacketsPerLID.need_sync_host()="
2344 << (numPacketsPerLID.need_sync_host () ?
"true" :
"false")
2345 <<
", importLIDs.need_sync_host()="
2346 << (importLIDs.need_sync_host () ?
"true" :
"false")
2349 std::cerr << os.str ();
2355 const auto importLIDsHost = importLIDs.view_host ();
2356 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host ();
2362 Kokkos::View<size_t*, host_exec> offset (
"offset", numImportLIDs+1);
2364 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numImportLIDs+1);
2365 Kokkos::parallel_scan
2366 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
2367 [=] (
const size_t &i,
size_t &update,
const bool &
final) {
2368 if (
final) offset(i) = update;
2369 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
2379 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2380 errorDuringUnpack (
"errorDuringUnpack");
2381 errorDuringUnpack () = 0;
2383 using policy_type = Kokkos::TeamPolicy<host_exec>;
2384 size_t scratch_per_row =
sizeof(GO) * maxRowNumEnt +
sizeof (LO) * maxRowNumEnt + numBytesPerValue * maxRowNumScalarEnt
2387 const auto policy = policy_type (numImportLIDs, 1, 1)
2388 .set_scratch_size (0, Kokkos::PerTeam (scratch_per_row));
2389 using host_scratch_space =
typename host_exec::scratch_memory_space;
2391 using pair_type = Kokkos::pair<size_t, size_t>;
2394 getValuesHostNonConst();
2396 Kokkos::parallel_for
2397 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
2398 [=] (
const typename policy_type::member_type& member) {
2399 const size_t i = member.league_rank();
2400 Kokkos::View<GO*, host_scratch_space> gblColInds
2401 (member.team_scratch (0), maxRowNumEnt);
2402 Kokkos::View<LO*, host_scratch_space> lclColInds
2403 (member.team_scratch (0), maxRowNumEnt);
2404 Kokkos::View<impl_scalar_type*, host_scratch_space> vals
2405 (member.team_scratch (0), maxRowNumScalarEnt);
2408 const size_t offval = offset(i);
2409 const LO lclRow = importLIDsHost(i);
2410 const size_t numBytes = numPacketsPerLIDHost(i);
2411 const size_t numEnt =
2412 unpackRowCount<impl_scalar_type, LO, GO>
2413 (imports.view_host (), offval, numBytes, numBytesPerValue);
2416 if (numEnt > maxRowNumEnt) {
2417 errorDuringUnpack() = 1;
2419 std::ostringstream os;
2421 <<
"At i = " << i <<
", numEnt = " << numEnt
2422 <<
" > maxRowNumEnt = " << maxRowNumEnt
2424 std::cerr << os.str ();
2429 const size_t numScalarEnt = numEnt*blockSize*blockSize;
2430 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
2431 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
2432 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
2437 size_t numBytesOut = 0;
2440 unpackRowForBlockCrs<impl_scalar_type, LO, GO>
2441 (Kokkos::View<GO*,host_exec>(gidsOut.data(), numEnt),
2442 Kokkos::View<impl_scalar_type*,host_exec>(valsOut.data(), numScalarEnt),
2443 imports.view_host(),
2444 offval, numBytes, numEnt,
2445 numBytesPerValue, blockSize);
2447 catch (std::exception& e) {
2448 errorDuringUnpack() = 1;
2450 std::ostringstream os;
2451 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
2452 << e.what () << endl;
2453 std::cerr << os.str ();
2458 if (numBytes != numBytesOut) {
2459 errorDuringUnpack() = 1;
2461 std::ostringstream os;
2463 <<
"At i = " << i <<
", numBytes = " << numBytes
2464 <<
" != numBytesOut = " << numBytesOut <<
"."
2466 std::cerr << os.str();
2472 for (
size_t k = 0; k < numEnt; ++k) {
2474 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
2476 std::ostringstream os;
2478 <<
"At i = " << i <<
", GID " << gidsOut(k)
2479 <<
" is not owned by the calling process."
2481 std::cerr << os.str();
2489 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
2490 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>
2492 if (combineMode ==
ADD) {
2493 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2494 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
2495 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2496 }
else if (combineMode ==
ABSMAX) {
2497 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2500 if (static_cast<LO> (numEnt) != numCombd) {
2501 errorDuringUnpack() = 1;
2503 std::ostringstream os;
2504 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
2505 <<
" != numCombd = " << numCombd <<
"."
2507 std::cerr << os.str ();
2515 if (errorDuringUnpack () != 0) {
2516 std::ostream& err = this->markLocalErrorAndGetStream ();
2517 err << prefix <<
"Unpacking failed.";
2519 err <<
" Please run again with the environment variable setting "
2520 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
2526 std::ostringstream os;
2527 const bool lclSuccess = ! (* (this->localError_));
2529 << (lclSuccess ?
"succeeded" :
"FAILED")
2531 std::cerr << os.str ();
2535 template<
class Scalar,
class LO,
class GO,
class Node>
2539 using Teuchos::TypeNameTraits;
2540 std::ostringstream os;
2541 os <<
"\"Tpetra::BlockCrsMatrix\": { "
2542 <<
"Template parameters: { "
2543 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
2544 <<
"LO: " << TypeNameTraits<LO>::name ()
2545 <<
"GO: " << TypeNameTraits<GO>::name ()
2546 <<
"Node: " << TypeNameTraits<Node>::name ()
2548 <<
", Label: \"" << this->getObjectLabel () <<
"\""
2549 <<
", Global dimensions: ["
2550 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2551 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
2552 <<
", Block size: " << getBlockSize ()
2558 template<
class Scalar,
class LO,
class GO,
class Node>
2562 const Teuchos::EVerbosityLevel verbLevel)
const
2564 using Teuchos::ArrayRCP;
2565 using Teuchos::CommRequest;
2566 using Teuchos::FancyOStream;
2567 using Teuchos::getFancyOStream;
2568 using Teuchos::ireceive;
2569 using Teuchos::isend;
2570 using Teuchos::outArg;
2571 using Teuchos::TypeNameTraits;
2572 using Teuchos::VERB_DEFAULT;
2573 using Teuchos::VERB_NONE;
2574 using Teuchos::VERB_LOW;
2575 using Teuchos::VERB_MEDIUM;
2577 using Teuchos::VERB_EXTREME;
2579 using Teuchos::wait;
2583 const Teuchos::EVerbosityLevel vl =
2584 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2586 if (vl == VERB_NONE) {
2591 Teuchos::OSTab tab0 (out);
2593 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
2594 Teuchos::OSTab tab1 (out);
2596 out <<
"Template parameters:" << endl;
2598 Teuchos::OSTab tab2 (out);
2599 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
2600 <<
"LO: " << TypeNameTraits<LO>::name () << endl
2601 <<
"GO: " << TypeNameTraits<GO>::name () << endl
2602 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
2604 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
2605 <<
"Global dimensions: ["
2606 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2607 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
2609 const LO blockSize = getBlockSize ();
2610 out <<
"Block size: " << blockSize << endl;
2613 if (vl >= VERB_MEDIUM) {
2614 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2615 const int myRank = comm.getRank ();
2617 out <<
"Row Map:" << endl;
2619 getRowMap()->describe (out, vl);
2621 if (! getColMap ().is_null ()) {
2622 if (getColMap() == getRowMap()) {
2624 out <<
"Column Map: same as row Map" << endl;
2629 out <<
"Column Map:" << endl;
2631 getColMap ()->describe (out, vl);
2634 if (! getDomainMap ().is_null ()) {
2635 if (getDomainMap () == getRowMap ()) {
2637 out <<
"Domain Map: same as row Map" << endl;
2640 else if (getDomainMap () == getColMap ()) {
2642 out <<
"Domain Map: same as column Map" << endl;
2647 out <<
"Domain Map:" << endl;
2649 getDomainMap ()->describe (out, vl);
2652 if (! getRangeMap ().is_null ()) {
2653 if (getRangeMap () == getDomainMap ()) {
2655 out <<
"Range Map: same as domain Map" << endl;
2658 else if (getRangeMap () == getRowMap ()) {
2660 out <<
"Range Map: same as row Map" << endl;
2665 out <<
"Range Map: " << endl;
2667 getRangeMap ()->describe (out, vl);
2672 if (vl >= VERB_EXTREME) {
2673 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2674 const int myRank = comm.getRank ();
2675 const int numProcs = comm.getSize ();
2678 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
2679 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
2680 FancyOStream& os = *osPtr;
2681 os <<
"Process " << myRank <<
":" << endl;
2682 Teuchos::OSTab tab2 (os);
2684 const map_type& meshRowMap = * (graph_.getRowMap ());
2685 const map_type& meshColMap = * (graph_.getColMap ());
2690 os <<
"Row " << meshGblRow <<
": {";
2692 local_inds_host_view_type lclColInds;
2693 values_host_view_type vals;
2695 this->getLocalRowView (meshLclRow, lclColInds, vals); numInds = lclColInds.extent(0);
2697 for (LO k = 0; k < numInds; ++k) {
2700 os <<
"Col " << gblCol <<
": [";
2701 for (LO i = 0; i < blockSize; ++i) {
2702 for (LO j = 0; j < blockSize; ++j) {
2703 os << vals[blockSize*blockSize*k + i*blockSize + j];
2704 if (j + 1 < blockSize) {
2708 if (i + 1 < blockSize) {
2713 if (k + 1 < numInds) {
2723 out << lclOutStrPtr->str ();
2724 lclOutStrPtr = Teuchos::null;
2727 const int sizeTag = 1337;
2728 const int dataTag = 1338;
2730 ArrayRCP<char> recvDataBuf;
2734 for (
int p = 1; p < numProcs; ++p) {
2737 ArrayRCP<size_t> recvSize (1);
2739 RCP<CommRequest<int> > recvSizeReq =
2740 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
2741 wait<int> (comm, outArg (recvSizeReq));
2742 const size_t numCharsToRecv = recvSize[0];
2749 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
2750 recvDataBuf.resize (numCharsToRecv + 1);
2752 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
2754 RCP<CommRequest<int> > recvDataReq =
2755 ireceive<int, char> (recvData, p, dataTag, comm);
2756 wait<int> (comm, outArg (recvDataReq));
2761 recvDataBuf[numCharsToRecv] =
'\0';
2762 out << recvDataBuf.getRawPtr ();
2764 else if (myRank == p) {
2768 const std::string stringToSend = lclOutStrPtr->str ();
2769 lclOutStrPtr = Teuchos::null;
2772 const size_t numCharsToSend = stringToSend.size ();
2773 ArrayRCP<size_t> sendSize (1);
2774 sendSize[0] = numCharsToSend;
2775 RCP<CommRequest<int> > sendSizeReq =
2776 isend<int, size_t> (sendSize, 0, sizeTag, comm);
2777 wait<int> (comm, outArg (sendSizeReq));
2785 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
2786 RCP<CommRequest<int> > sendDataReq =
2787 isend<int, char> (sendData, 0, dataTag, comm);
2788 wait<int> (comm, outArg (sendDataReq));
2794 template<
class Scalar,
class LO,
class GO,
class Node>
2795 Teuchos::RCP<const Teuchos::Comm<int> >
2799 return graph_.getComm();
2803 template<
class Scalar,
class LO,
class GO,
class Node>
2808 return graph_.getGlobalNumCols();
2812 template<
class Scalar,
class LO,
class GO,
class Node>
2817 return graph_.getLocalNumCols();
2820 template<
class Scalar,
class LO,
class GO,
class Node>
2825 return graph_.getIndexBase();
2828 template<
class Scalar,
class LO,
class GO,
class Node>
2833 return graph_.getGlobalNumEntries();
2836 template<
class Scalar,
class LO,
class GO,
class Node>
2841 return graph_.getLocalNumEntries();
2844 template<
class Scalar,
class LO,
class GO,
class Node>
2849 return graph_.getNumEntriesInGlobalRow(globalRow);
2853 template<
class Scalar,
class LO,
class GO,
class Node>
2858 return graph_.getGlobalMaxNumRowEntries();
2861 template<
class Scalar,
class LO,
class GO,
class Node>
2866 return graph_.hasColMap();
2870 template<
class Scalar,
class LO,
class GO,
class Node>
2875 return graph_.isLocallyIndexed();
2878 template<
class Scalar,
class LO,
class GO,
class Node>
2883 return graph_.isGloballyIndexed();
2886 template<
class Scalar,
class LO,
class GO,
class Node>
2891 return graph_.isFillComplete ();
2894 template<
class Scalar,
class LO,
class GO,
class Node>
2902 template<
class Scalar,
class LO,
class GO,
class Node>
2906 nonconst_global_inds_host_view_type &,
2907 nonconst_values_host_view_type &,
2910 TEUCHOS_TEST_FOR_EXCEPTION(
2911 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
2912 "This class doesn't support global matrix indexing.");
2916 template<
class Scalar,
class LO,
class GO,
class Node>
2920 global_inds_host_view_type &,
2921 values_host_view_type &)
const
2923 TEUCHOS_TEST_FOR_EXCEPTION(
2924 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
2925 "This class doesn't support global matrix indexing.");
2929 template<
class Scalar,
class LO,
class GO,
class Node>
2933 local_inds_host_view_type &colInds,
2934 values_host_view_type &vals)
const
2936 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
2937 colInds = local_inds_host_view_type();
2938 vals = values_host_view_type();
2941 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2942 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2943 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
2945 vals = getValuesHost (localRowInd);
2949 template<
class Scalar,
class LO,
class GO,
class Node>
2953 local_inds_host_view_type &colInds,
2954 nonconst_values_host_view_type &vals)
const
2956 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
2957 colInds = nonconst_local_inds_host_view_type();
2958 vals = nonconst_values_host_view_type();
2961 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2962 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2963 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
2970 template<
class Scalar,
class LO,
class GO,
class Node>
2975 const size_t lclNumMeshRows = graph_.getLocalNumRows ();
2977 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
2978 graph_.getLocalDiagOffsets (diagOffsets);
2981 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
2985 auto vals_host_out = val_.getHostView(Access::ReadOnly);
2989 size_t rowOffset = 0;
2991 LO bs = getBlockSize();
2992 for(
size_t r=0; r<getLocalNumRows(); r++)
2995 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
2996 for(
int b=0; b<bs; b++)
3001 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3007 template<
class Scalar,
class LO,
class GO,
class Node>
3012 TEUCHOS_TEST_FOR_EXCEPTION(
3013 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3014 "not implemented.");
3018 template<
class Scalar,
class LO,
class GO,
class Node>
3023 TEUCHOS_TEST_FOR_EXCEPTION(
3024 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3025 "not implemented.");
3029 template<
class Scalar,
class LO,
class GO,
class Node>
3030 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3037 template<
class Scalar,
class LO,
class GO,
class Node>
3038 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3042 TEUCHOS_TEST_FOR_EXCEPTION(
3043 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3044 "not implemented.");
3054 #define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3055 template class BlockCrsMatrix< S, LO, GO, NODE >;
3057 #endif // TPETRA_BLOCKCRSMATRIX_DEF_HPP
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
Kokkos::View< impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
std::string description() const override
One-line description of this object.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
void exportAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Export< LO, GO, Node > &exporter) const
Import from this to the given destination matrix, and make the result fill complete.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
LO local_ordinal_type
The type of local indices.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
One or more distributed dense vectors.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Linear algebra kernels for small dense matrices and vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Scalar scalar_type
The type of entries in the matrix (that is, of each entry in each block).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
size_t getLocalNumRows() const override
get the local number of block rows
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
Kokkos::LayoutRight BlockCrsMatrixLittleBlockArrayLayout
give an option to use layoutleft
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
local_ordinal_type getMinLocalIndex() const
The minimum local index.
MultiVector for multiple degrees of freedom per mesh point.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
size_t global_size_t
Global size_t object.
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
global_size_t getGlobalNumRows() const override
get the global number of block rows
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value)
Replace current value at the specified location with specified values.
local_ordinal_type getMaxLocalIndex() const
The maximum local index on the calling process.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
CombineMode
Rule for combining data in an Import or Export.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
Replace old value with maximum of magnitudes of old and new values.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
Replace existing values with new values.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
Replace old values with zero.
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
char packet_type
Implementation detail; tells.
local_matrix_device_type getLocalMatrixDevice() const
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
A distributed dense vector.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Base class for distributed Tpetra objects that support data redistribution.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
void importAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Import< LO, GO, Node > &importer) const
Import from this to the given destination matrix, and make the result fill complete.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row. ...