10 #ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
11 #define TPETRA_BLOCKCRSMATRIX_DEF_HPP
19 #include "Tpetra_BlockMultiVector.hpp"
22 #include "Teuchos_TimeMonitor.hpp"
23 #ifdef HAVE_TPETRA_DEBUG
25 #endif // HAVE_TPETRA_DEBUG
27 #include "KokkosSparse_spmv.hpp"
34 struct BlockCrsRowStruct {
35 T totalNumEntries, totalNumBytes, maxRowLength;
36 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
37 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct& b) =
default;
38 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
39 : totalNumEntries(numEnt)
40 , totalNumBytes(numBytes)
41 , maxRowLength(rowLength) {}
45 static KOKKOS_INLINE_FUNCTION
void operator+=(BlockCrsRowStruct<T>& a,
const BlockCrsRowStruct<T>& b) {
46 a.totalNumEntries += b.totalNumEntries;
47 a.totalNumBytes += b.totalNumBytes;
48 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
51 template <
typename T,
typename ExecSpace>
52 struct BlockCrsReducer {
53 typedef BlockCrsReducer reducer;
55 typedef Kokkos::View<value_type, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
58 KOKKOS_INLINE_FUNCTION
59 BlockCrsReducer(value_type& val)
62 KOKKOS_INLINE_FUNCTION
void join(value_type& dst, value_type& src)
const { dst += src; }
63 KOKKOS_INLINE_FUNCTION
void join(value_type& dst,
const value_type& src)
const { dst += src; }
64 KOKKOS_INLINE_FUNCTION
void init(value_type& val)
const { val = value_type(); }
65 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
66 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
75 template <
class Scalar,
class LO,
class GO,
class Node>
76 class GetLocalDiagCopy {
78 typedef typename Node::device_type device_type;
79 typedef size_t diag_offset_type;
80 typedef Kokkos::View<
const size_t*, device_type,
81 Kokkos::MemoryUnmanaged>
83 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
85 typedef typename local_graph_device_type::row_map_type row_offsets_type;
86 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
87 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
88 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
91 GetLocalDiagCopy(
const diag_type& diag,
92 const values_type& val,
93 const diag_offsets_type& diagOffsets,
94 const row_offsets_type& ptr,
97 , diagOffsets_(diagOffsets)
99 , blockSize_(blockSize)
100 , offsetPerBlock_(blockSize_ * blockSize_)
103 KOKKOS_INLINE_FUNCTION
void
104 operator()(
const LO& lclRowInd)
const {
108 const size_t absOffset = ptr_[lclRowInd];
111 const size_t relOffset = diagOffsets_[lclRowInd];
114 const size_t pointOffset = (absOffset + relOffset) * offsetPerBlock_;
119 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
120 const_little_block_type;
121 const_little_block_type D_in(val_.data() + pointOffset,
122 blockSize_, blockSize_);
123 auto D_out = Kokkos::subview(diag_, lclRowInd, ALL(), ALL());
129 diag_offsets_type diagOffsets_;
130 row_offsets_type ptr_;
137 template <
class Scalar,
class LO,
class GO,
class Node>
139 BlockCrsMatrix<Scalar, LO, GO, Node>::
140 markLocalErrorAndGetStream() {
141 *(this->localError_) =
true;
142 if ((*errs_).is_null()) {
143 *errs_ = Teuchos::rcp(
new std::ostringstream());
148 template <
class Scalar,
class LO,
class GO,
class Node>
153 graph_(Teuchos::rcp(new
map_type()), 0)
155 blockSize_(static_cast<LO>(0))
156 , X_colMap_(new Teuchos::RCP<
BMV>())
158 Y_rowMap_(new Teuchos::RCP<
BMV>())
160 pointImporter_(new Teuchos::RCP<typename
crs_graph_type::import_type>())
162 , localError_(new bool(false))
163 , errs_(new Teuchos::RCP<std::ostringstream>())
167 template <
class Scalar,
class LO,
class GO,
class Node>
173 , rowMeshMap_(*(graph.getRowMap()))
174 , blockSize_(blockSize)
175 , X_colMap_(new Teuchos::RCP<
BMV>())
177 Y_rowMap_(new Teuchos::RCP<
BMV>())
179 pointImporter_(new Teuchos::RCP<typename
crs_graph_type::import_type>())
180 , offsetPerBlock_(blockSize * blockSize)
181 , localError_(new bool(false))
182 , errs_(new Teuchos::RCP<std::ostringstream>())
185 TEUCHOS_TEST_FOR_EXCEPTION(
186 !graph_.
isSorted(), std::invalid_argument,
188 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
189 "rows (isSorted() is false). This class assumes sorted rows.");
191 graphRCP_ = Teuchos::rcpFromRef(graph_);
197 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
198 TEUCHOS_TEST_FOR_EXCEPTION(
199 blockSizeIsNonpositive, std::invalid_argument,
201 "BlockCrsMatrix constructor: The input blockSize = "
202 << blockSize <<
" <= 0. The block size must be positive.");
214 auto local_graph_h = graph.getLocalGraphHost();
215 auto ptr_h = local_graph_h.row_map;
216 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
219 auto ind_h = local_graph_h.entries;
220 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
224 const auto numValEnt = ind_h.extent(0) * offsetPerBlock();
225 val_ = decltype(val_)(impl_scalar_type_dualview(
"val", numValEnt));
229 template <
class Scalar,
class LO,
class GO,
class Node>
232 const typename local_matrix_device_type::values_type& values,
236 , rowMeshMap_(*(graph.getRowMap()))
237 , blockSize_(blockSize)
238 , X_colMap_(new Teuchos::RCP<
BMV>())
240 Y_rowMap_(new Teuchos::RCP<
BMV>())
242 pointImporter_(new Teuchos::RCP<typename
crs_graph_type::import_type>())
243 , offsetPerBlock_(blockSize * blockSize)
244 , localError_(new bool(false))
245 , errs_(new Teuchos::RCP<std::ostringstream>())
248 TEUCHOS_TEST_FOR_EXCEPTION(
249 !graph_.
isSorted(), std::invalid_argument,
251 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
252 "rows (isSorted() is false). This class assumes sorted rows.");
254 graphRCP_ = Teuchos::rcpFromRef(graph_);
260 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
261 TEUCHOS_TEST_FOR_EXCEPTION(
262 blockSizeIsNonpositive, std::invalid_argument,
264 "BlockCrsMatrix constructor: The input blockSize = "
265 << blockSize <<
" <= 0. The block size must be positive.");
277 auto local_graph_h = graph.getLocalGraphHost();
278 auto ptr_h = local_graph_h.row_map;
279 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
282 auto ind_h = local_graph_h.entries;
283 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
286 const auto numValEnt = ind_h.extent(0) * offsetPerBlock();
287 TEUCHOS_ASSERT_EQUALITY(numValEnt, values.size());
288 val_ = decltype(val_)(values);
292 template <
class Scalar,
class LO,
class GO,
class Node>
300 , rowMeshMap_(*(graph.getRowMap()))
301 , domainPointMap_(domainPointMap)
302 , rangePointMap_(rangePointMap)
303 , blockSize_(blockSize)
304 , X_colMap_(new Teuchos::RCP<
BMV>())
306 Y_rowMap_(new Teuchos::RCP<
BMV>())
308 pointImporter_(new Teuchos::RCP<typename
crs_graph_type::import_type>())
309 , offsetPerBlock_(blockSize * blockSize)
310 , localError_(new bool(false))
311 , errs_(new Teuchos::RCP<std::ostringstream>())
313 TEUCHOS_TEST_FOR_EXCEPTION(
314 !graph_.
isSorted(), std::invalid_argument,
316 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
317 "rows (isSorted() is false). This class assumes sorted rows.");
319 graphRCP_ = Teuchos::rcpFromRef(graph_);
325 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
326 TEUCHOS_TEST_FOR_EXCEPTION(
327 blockSizeIsNonpositive, std::invalid_argument,
329 "BlockCrsMatrix constructor: The input blockSize = "
330 << blockSize <<
" <= 0. The block size must be positive.");
338 auto local_graph_h = graph.getLocalGraphHost();
339 auto ptr_h = local_graph_h.row_map;
340 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
344 auto ind_h = local_graph_h.entries;
345 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
349 const auto numValEnt = ind_h.extent(0) * offsetPerBlock();
350 val_ = decltype(val_)(impl_scalar_type_dualview(
"val", numValEnt));
354 template <
class Scalar,
class LO,
class GO,
class Node>
355 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
359 return Teuchos::rcp(
new map_type(domainPointMap_));
362 template <
class Scalar,
class LO,
class GO,
class Node>
363 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
367 return Teuchos::rcp(
new map_type(rangePointMap_));
370 template <
class Scalar,
class LO,
class GO,
class Node>
371 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
374 return graph_.getRowMap();
377 template <
class Scalar,
class LO,
class GO,
class Node>
378 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
381 return graph_.getColMap();
384 template <
class Scalar,
class LO,
class GO,
class Node>
388 return graph_.getGlobalNumRows();
391 template <
class Scalar,
class LO,
class GO,
class Node>
395 return graph_.getLocalNumRows();
398 template <
class Scalar,
class LO,
class GO,
class Node>
402 return graph_.getLocalMaxNumRowEntries();
405 template <
class Scalar,
class LO,
class GO,
class Node>
409 Teuchos::ETransp mode,
413 TEUCHOS_TEST_FOR_EXCEPTION(
414 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
415 std::invalid_argument,
416 "Tpetra::BlockCrsMatrix::apply: "
417 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
418 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
420 TEUCHOS_TEST_FOR_EXCEPTION(
422 std::invalid_argument,
423 "Tpetra::BlockCrsMatrix::apply: "
424 "X and Y must both be constant stride");
428 const LO blockSize = getBlockSize();
430 X_view =
BMV(X, *(graph_.getDomainMap()), blockSize);
431 Y_view =
BMV(Y, *(graph_.getRangeMap()), blockSize);
432 }
catch (std::invalid_argument& e) {
433 TEUCHOS_TEST_FOR_EXCEPTION(
434 true, std::invalid_argument,
435 "Tpetra::BlockCrsMatrix::"
436 "apply: Either the input MultiVector X or the output MultiVector Y "
437 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
438 "graph. BlockMultiVector's constructor threw the following exception: "
447 const_cast<this_BCRS_type&
>(*this).applyBlock(X_view, Y_view, mode, alpha, beta);
448 }
catch (std::invalid_argument& e) {
449 TEUCHOS_TEST_FOR_EXCEPTION(
450 true, std::invalid_argument,
451 "Tpetra::BlockCrsMatrix::"
452 "apply: The implementation method applyBlock complained about having "
453 "an invalid argument. It reported the following: "
455 }
catch (std::logic_error& e) {
456 TEUCHOS_TEST_FOR_EXCEPTION(
457 true, std::invalid_argument,
458 "Tpetra::BlockCrsMatrix::"
459 "apply: The implementation method applyBlock complained about a "
460 "possible bug in its implementation. It reported the following: "
462 }
catch (std::exception& e) {
463 TEUCHOS_TEST_FOR_EXCEPTION(
464 true, std::invalid_argument,
465 "Tpetra::BlockCrsMatrix::"
466 "apply: The implementation method applyBlock threw an exception which "
467 "is neither std::invalid_argument nor std::logic_error, but is a "
468 "subclass of std::exception. It reported the following: "
471 TEUCHOS_TEST_FOR_EXCEPTION(
472 true, std::logic_error,
473 "Tpetra::BlockCrsMatrix::"
474 "apply: The implementation method applyBlock threw an exception which "
475 "is not an instance of a subclass of std::exception. This probably "
476 "indicates a bug. Please report this to the Tpetra developers.");
480 template <
class Scalar,
class LO,
class GO,
class Node>
484 Teuchos::ETransp mode,
487 TEUCHOS_TEST_FOR_EXCEPTION(
489 "Tpetra::BlockCrsMatrix::applyBlock: "
490 "X and Y have different block sizes. X.getBlockSize() = "
494 if (mode == Teuchos::NO_TRANS) {
495 applyBlockNoTrans(X, Y, alpha, beta);
496 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
497 applyBlockTrans(X, Y, mode, alpha, beta);
499 TEUCHOS_TEST_FOR_EXCEPTION(
500 true, std::invalid_argument,
501 "Tpetra::BlockCrsMatrix::"
502 "applyBlock: Invalid 'mode' argument. Valid values are "
503 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
507 template <
class Scalar,
class LO,
class GO,
class Node>
516 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
517 "destMatrix is required to be null.");
521 RCP<crs_graph_type> srcGraph = rcp(
new crs_graph_type(this->getCrsGraph()));
522 RCP<crs_graph_type> destGraph = importAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, importer,
523 srcGraph->getDomainMap(),
524 srcGraph->getRangeMap());
527 destMatrix = rcp(
new this_BCRS_type(*destGraph, getBlockSize()));
531 template <
class Scalar,
class LO,
class GO,
class Node>
540 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
541 "destMatrix is required to be null.");
545 RCP<crs_graph_type> srcGraph = rcp(
new crs_graph_type(this->getCrsGraph()));
546 RCP<crs_graph_type> destGraph = exportAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, exporter,
547 srcGraph->getDomainMap(),
548 srcGraph->getRangeMap());
551 destMatrix = rcp(
new this_BCRS_type(*destGraph, getBlockSize()));
555 template <
class Scalar,
class LO,
class GO,
class Node>
558 auto val_d = val_.getDeviceView(Access::OverwriteAll);
562 template <
class Scalar,
class LO,
class GO,
class Node>
567 const LO numColInds)
const {
568 std::vector<ptrdiff_t> offsets(numColInds);
569 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
570 const LO validCount = this->replaceLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
574 template <
class Scalar,
class LO,
class GO,
class Node>
577 Kokkos::MemoryUnmanaged>& offsets)
const {
578 graph_.getLocalDiagOffsets(offsets);
581 template <
class Scalar,
class LO,
class GO,
class Node>
584 Kokkos::MemoryUnmanaged>& diag,
585 const Kokkos::View<
const size_t*, device_type,
586 Kokkos::MemoryUnmanaged>& offsets)
const {
587 using Kokkos::parallel_for;
588 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
590 const LO lclNumMeshRows =
static_cast<LO
>(rowMeshMap_.getLocalNumElements());
591 const LO blockSize = this->getBlockSize();
592 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<LO>(diag.extent(0)) < lclNumMeshRows ||
593 static_cast<LO>(diag.extent(1)) < blockSize ||
594 static_cast<LO>(diag.extent(2)) < blockSize,
595 std::invalid_argument, prefix <<
"The input Kokkos::View is not big enough to hold all the data.");
596 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<LO>(offsets.size()) < lclNumMeshRows, std::invalid_argument,
597 prefix <<
"offsets.size() = " << offsets.size() <<
" < local number of "
599 << lclNumMeshRows <<
".");
601 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
602 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
608 auto val_d = val_.getDeviceView(Access::ReadOnly);
609 parallel_for(policy_type(0, lclNumMeshRows),
610 functor_type(diag, val_d, offsets,
611 graph_.getLocalGraphDevice().row_map, blockSize_));
614 template <
class Scalar,
class LO,
class GO,
class Node>
619 const LO numColInds)
const {
620 std::vector<ptrdiff_t> offsets(numColInds);
621 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
622 const LO validCount = this->absMaxLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
626 template <
class Scalar,
class LO,
class GO,
class Node>
631 const LO numColInds)
const {
632 std::vector<ptrdiff_t> offsets(numColInds);
633 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets.data(), colInds, numColInds);
634 const LO validCount = this->sumIntoLocalValuesByOffsets(localRowInd, offsets.data(), vals, numOffsets);
637 template <
class Scalar,
class LO,
class GO,
class Node>
640 nonconst_local_inds_host_view_type& Indices,
641 nonconst_values_host_view_type& Values,
642 size_t& NumEntries)
const {
643 auto vals = getValuesHost(LocalRow);
644 const LO numInds = ptrHost_(LocalRow + 1) - ptrHost_(LocalRow);
645 if (numInds > (LO)Indices.extent(0) || numInds * blockSize_ * blockSize_ > (LO)Values.extent(0)) {
646 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
647 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
648 << numInds <<
" row entries");
650 const LO* colInds = indHost_.data() + ptrHost_(LocalRow);
651 for (LO i = 0; i < numInds; ++i) {
652 Indices[i] = colInds[i];
654 for (LO i = 0; i < numInds * blockSize_ * blockSize_; ++i) {
657 NumEntries = numInds;
660 template <
class Scalar,
class LO,
class GO,
class Node>
665 const LO numColInds)
const {
666 if (!rowMeshMap_.isNodeLocalElement(localRowInd)) {
671 return static_cast<LO
>(0);
674 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid();
678 for (LO k = 0; k < numColInds; ++k) {
679 const LO relBlockOffset =
680 this->findRelOffsetOfColumnIndex(localRowInd, colInds[k], hint);
681 if (relBlockOffset != LINV) {
682 offsets[k] =
static_cast<ptrdiff_t
>(relBlockOffset);
683 hint = relBlockOffset + 1;
690 template <
class Scalar,
class LO,
class GO,
class Node>
693 const ptrdiff_t offsets[],
695 const LO numOffsets)
const {
696 if (!rowMeshMap_.isNodeLocalElement(localRowInd)) {
701 return static_cast<LO
>(0);
708 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
709 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid();
710 size_t pointOffset = 0;
713 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
714 const size_t blockOffset = offsets[k] * perBlockSize;
715 if (offsets[k] != STINV) {
717 const_little_block_type A_new = getConstLocalBlockFromInput(vIn, pointOffset);
725 template <
class Scalar,
class LO,
class GO,
class Node>
728 const ptrdiff_t offsets[],
730 const LO numOffsets)
const {
731 if (!rowMeshMap_.isNodeLocalElement(localRowInd)) {
736 return static_cast<LO
>(0);
738 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
>(vals);
739 auto val_out = getValuesHost(localRowInd);
740 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
742 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
743 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
744 size_t pointOffset = 0;
747 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
748 const size_t blockOffset = offsets[k] * perBlockSize;
749 if (blockOffset != STINV) {
750 little_block_type A_old = getNonConstLocalBlockFromInput(vOut, blockOffset);
751 const_little_block_type A_new = getConstLocalBlockFromInput(vIn, pointOffset);
759 template <
class Scalar,
class LO,
class GO,
class Node>
762 const ptrdiff_t offsets[],
764 const LO numOffsets)
const {
765 if (!rowMeshMap_.isNodeLocalElement(localRowInd)) {
770 return static_cast<LO
>(0);
778 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
779 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
780 size_t pointOffset = 0;
783 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
784 const size_t blockOffset = offsets[k] * perBlockSize;
785 if (blockOffset != STINV) {
787 const_little_block_type A_new = getConstLocalBlockFromInput(vIn, pointOffset);
788 AXPY(ONE, A_new, A_old);
795 template <
class Scalar,
class LO,
class GO,
class Node>
797 impl_scalar_type_dualview::t_host::const_type
800 return val_.getHostView(Access::ReadOnly);
803 template <
class Scalar,
class LO,
class GO,
class Node>
804 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
805 impl_scalar_type_dualview::t_dev::const_type
806 BlockCrsMatrix<Scalar, LO, GO, Node>::
807 getValuesDevice()
const {
808 return val_.getDeviceView(Access::ReadOnly);
811 template <
class Scalar,
class LO,
class GO,
class Node>
812 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
813 impl_scalar_type_dualview::t_host
816 return val_.getHostView(Access::ReadWrite);
819 template <
class Scalar,
class LO,
class GO,
class Node>
821 impl_scalar_type_dualview::t_dev
824 return val_.getDeviceView(Access::ReadWrite);
827 template <
class Scalar,
class LO,
class GO,
class Node>
828 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
829 impl_scalar_type_dualview::t_host::const_type
830 BlockCrsMatrix<Scalar, LO, GO, Node>::
831 getValuesHost(
const LO& lclRow)
const {
832 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
833 auto val = val_.getHostView(Access::ReadOnly);
834 auto r_val = Kokkos::subview(val, Kokkos::pair<LO, LO>(ptrHost_(lclRow) * perBlockSize, ptrHost_(lclRow + 1) * perBlockSize));
838 template <
class Scalar,
class LO,
class GO,
class Node>
840 impl_scalar_type_dualview::t_dev::const_type
843 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
844 auto val = val_.getDeviceView(Access::ReadOnly);
845 auto r_val = Kokkos::subview(val, Kokkos::pair<LO, LO>(ptrHost_(lclRow) * perBlockSize, ptrHost_(lclRow + 1) * perBlockSize));
849 template <
class Scalar,
class LO,
class GO,
class Node>
853 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
854 auto val = val_.getHostView(Access::ReadWrite);
855 auto r_val = Kokkos::subview(val, Kokkos::pair<LO, LO>(ptrHost_(lclRow) * perBlockSize, ptrHost_(lclRow + 1) * perBlockSize));
859 template <
class Scalar,
class LO,
class GO,
class Node>
863 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
864 auto val = val_.getDeviceView(Access::ReadWrite);
865 auto r_val = Kokkos::subview(val, Kokkos::pair<LO, LO>(ptrHost_(lclRow) * perBlockSize, ptrHost_(lclRow + 1) * perBlockSize));
869 template <
class Scalar,
class LO,
class GO,
class Node>
873 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow(localRowInd);
874 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid()) {
875 return static_cast<LO
>(0);
877 return static_cast<LO
>(numEntInGraph);
881 template <
class Scalar,
class LO,
class GO,
class Node>
882 typename BlockCrsMatrix<Scalar, LO, GO, Node>::local_matrix_device_type
885 auto numCols = this->graph_.getColMap()->getLocalNumElements();
886 auto val = val_.getDeviceView(Access::ReadWrite);
887 const LO blockSize = this->getBlockSize();
888 const auto graph = this->graph_.getLocalGraphDevice();
890 return local_matrix_device_type(
"Tpetra::BlockCrsMatrix::lclMatrixDevice",
897 template <
class Scalar,
class LO,
class GO,
class Node>
901 const Teuchos::ETransp mode,
910 TEUCHOS_TEST_FOR_EXCEPTION(
911 true, std::logic_error,
912 "Tpetra::BlockCrsMatrix::apply: "
913 "transpose and conjugate transpose modes are not yet implemented.");
916 template <
class Scalar,
class LO,
class GO,
class Node>
917 void BlockCrsMatrix<Scalar, LO, GO, Node>::
918 applyBlockNoTrans(
const BlockMultiVector<Scalar, LO, GO, Node>& X,
919 BlockMultiVector<Scalar, LO, GO, Node>& Y,
924 typedef ::Tpetra::Import<LO, GO, Node> import_type;
925 typedef ::Tpetra::Export<LO, GO, Node> export_type;
926 const Scalar zero = STS::zero();
927 const Scalar one = STS::one();
928 RCP<const import_type>
import = graph_.getImporter();
930 RCP<const export_type> theExport = graph_.getExporter();
931 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
936 }
else if (beta != one) {
940 const BMV* X_colMap = NULL;
941 if (
import.is_null()) {
944 }
catch (std::exception& e) {
945 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
946 "operator= threw an exception: "
957 if ((*X_colMap_).is_null() ||
958 (**X_colMap_).getNumVectors() != X.getNumVectors() ||
959 (**X_colMap_).getBlockSize() != X.getBlockSize()) {
960 *X_colMap_ = rcp(
new BMV(*(graph_.getColMap()), getBlockSize(),
961 static_cast<LO
>(X.getNumVectors())));
963 (*X_colMap_)->getMultiVectorView().doImport(X.getMultiVectorView(), **pointImporter_,
::Tpetra::REPLACE);
965 X_colMap = &(**X_colMap_);
966 }
catch (std::exception& e) {
967 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
968 "operator= threw an exception: "
974 BMV* Y_rowMap = NULL;
975 if (theExport.is_null()) {
977 }
else if ((*Y_rowMap_).is_null() ||
978 (**Y_rowMap_).getNumVectors() != Y.getNumVectors() ||
979 (**Y_rowMap_).getBlockSize() != Y.getBlockSize()) {
980 *Y_rowMap_ = rcp(
new BMV(*(graph_.getRowMap()), getBlockSize(),
981 static_cast<LO
>(X.getNumVectors())));
983 Y_rowMap = &(**Y_rowMap_);
984 }
catch (std::exception& e) {
985 TEUCHOS_TEST_FOR_EXCEPTION(
986 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
987 "operator= threw an exception: "
994 localApplyBlockNoTrans(*X_colMap, *Y_rowMap, alpha, beta);
995 }
catch (std::exception& e) {
996 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1000 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1001 "an exception not a subclass of std::exception.");
1004 if (!theExport.is_null()) {
1010 template <
class Scalar,
class LO,
class GO,
class Node>
1011 void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
1012 const BlockMultiVector<Scalar, LO, GO, Node>& X,
1013 BlockMultiVector<Scalar, LO, GO, Node>& Y,
const Scalar alpha,
1014 const Scalar beta) {
1016 "Tpetra::BlockCrsMatrix::localApplyBlockNoTrans");
1017 const impl_scalar_type alpha_impl = alpha;
1018 const auto graph = this->graph_.getLocalGraphDevice();
1020 mv_type X_mv = X.getMultiVectorView();
1021 mv_type Y_mv = Y.getMultiVectorView();
1022 auto X_lcl = X_mv.getLocalViewDevice(Access::ReadOnly);
1023 auto Y_lcl = Y_mv.getLocalViewDevice(Access::ReadWrite);
1025 auto A_lcl = getLocalMatrixDevice();
1026 if (!applyHelper.get()) {
1028 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map);
1030 if (applyHelper->shouldUseIntRowptrs()) {
1031 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
1033 &applyHelper->handle_int, KokkosSparse::NoTranspose,
1034 alpha_impl, A_lcl_int_rowptrs, X_lcl, beta, Y_lcl);
1037 &applyHelper->handle, KokkosSparse::NoTranspose,
1038 alpha_impl, A_lcl, X_lcl, beta, Y_lcl);
1042 template <
class Scalar,
class LO,
class GO,
class Node>
1043 LO BlockCrsMatrix<Scalar, LO, GO, Node>::
1044 findRelOffsetOfColumnIndex(
const LO localRowIndex,
1045 const LO colIndexToFind,
1046 const LO hint)
const {
1047 const size_t absStartOffset = ptrHost_[localRowIndex];
1048 const size_t absEndOffset = ptrHost_[localRowIndex + 1];
1049 const LO numEntriesInRow =
static_cast<LO
>(absEndOffset - absStartOffset);
1051 const LO*
const curInd = indHost_.data() + absStartOffset;
1054 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1061 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid();
1066 const LO maxNumEntriesForLinearSearch = 32;
1067 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1072 const LO* beg = curInd;
1073 const LO* end = curInd + numEntriesInRow;
1074 std::pair<const LO*, const LO*> p =
1075 std::equal_range(beg, end, colIndexToFind);
1076 if (p.first != p.second) {
1078 relOffset =
static_cast<LO
>(p.first - beg);
1081 for (LO k = 0; k < numEntriesInRow; ++k) {
1082 if (colIndexToFind == curInd[k]) {
1092 template <
class Scalar,
class LO,
class GO,
class Node>
1093 LO BlockCrsMatrix<Scalar, LO, GO, Node>::
1094 offsetPerBlock()
const {
1095 return offsetPerBlock_;
1098 template <
class Scalar,
class LO,
class GO,
class Node>
1099 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1100 BlockCrsMatrix<Scalar, LO, GO, Node>::
1101 getConstLocalBlockFromInput(
const impl_scalar_type* val,
1102 const size_t pointOffset)
const {
1104 const LO rowStride = blockSize_;
1105 return const_little_block_type(val + pointOffset, blockSize_, rowStride);
1108 template <
class Scalar,
class LO,
class GO,
class Node>
1109 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1110 BlockCrsMatrix<Scalar, LO, GO, Node>::
1111 getNonConstLocalBlockFromInput(impl_scalar_type* val,
1112 const size_t pointOffset)
const {
1114 const LO rowStride = blockSize_;
1115 return little_block_type(val + pointOffset, blockSize_, rowStride);
1118 template <
class Scalar,
class LO,
class GO,
class Node>
1119 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1120 BlockCrsMatrix<Scalar, LO, GO, Node>::
1121 getNonConstLocalBlockFromInputHost(impl_scalar_type* val,
1122 const size_t pointOffset)
const {
1124 const LO rowStride = blockSize_;
1125 const size_t bs2 = blockSize_ * blockSize_;
1126 return little_block_host_type(val + bs2 * pointOffset, blockSize_, rowStride);
1129 template <
class Scalar,
class LO,
class GO,
class Node>
1130 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1131 BlockCrsMatrix<Scalar, LO, GO, Node>::
1132 getLocalBlockDeviceNonConst(
const LO localRowInd,
const LO localColInd)
const {
1133 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1135 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1136 const LO relBlockOffset = this->findRelOffsetOfColumnIndex(localRowInd, localColInd);
1137 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid()) {
1138 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1139 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesDeviceNonConst();
1140 auto r_val = getNonConstLocalBlockFromInput(vals.data(), absBlockOffset);
1143 return little_block_type();
1147 template <
class Scalar,
class LO,
class GO,
class Node>
1148 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1149 BlockCrsMatrix<Scalar, LO, GO, Node>::
1150 getLocalBlockHostNonConst(
const LO localRowInd,
const LO localColInd)
const {
1151 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1153 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1154 const LO relBlockOffset = this->findRelOffsetOfColumnIndex(localRowInd, localColInd);
1155 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid()) {
1156 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1157 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesHostNonConst();
1158 auto r_val = getNonConstLocalBlockFromInputHost(vals.data(), absBlockOffset);
1161 return little_block_host_type();
1165 template <
class Scalar,
class LO,
class GO,
class Node>
1166 bool BlockCrsMatrix<Scalar, LO, GO, Node>::
1167 checkSizes(const ::Tpetra::SrcDistObject& source) {
1169 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_BCRS_type;
1170 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
>(&source);
1173 std::ostream& err = markLocalErrorAndGetStream();
1174 err <<
"checkSizes: The source object of the Import or Export "
1175 "must be a BlockCrsMatrix with the same template parameters as the "
1181 if (src->getBlockSize() != this->getBlockSize()) {
1182 std::ostream& err = markLocalErrorAndGetStream();
1183 err <<
"checkSizes: The source and target objects of the Import or "
1184 <<
"Export must have the same block sizes. The source's block "
1185 <<
"size = " << src->getBlockSize() <<
" != the target's block "
1186 <<
"size = " << this->getBlockSize() <<
"." << endl;
1188 if (!src->graph_.isFillComplete()) {
1189 std::ostream& err = markLocalErrorAndGetStream();
1190 err <<
"checkSizes: The source object of the Import or Export is "
1191 "not fill complete. Both source and target objects must be fill "
1195 if (!this->graph_.isFillComplete()) {
1196 std::ostream& err = markLocalErrorAndGetStream();
1197 err <<
"checkSizes: The target object of the Import or Export is "
1198 "not fill complete. Both source and target objects must be fill "
1202 if (src->graph_.getColMap().is_null()) {
1203 std::ostream& err = markLocalErrorAndGetStream();
1204 err <<
"checkSizes: The source object of the Import or Export does "
1205 "not have a column Map. Both source and target objects must have "
1209 if (this->graph_.getColMap().is_null()) {
1210 std::ostream& err = markLocalErrorAndGetStream();
1211 err <<
"checkSizes: The target object of the Import or Export does "
1212 "not have a column Map. Both source and target objects must have "
1218 return !(*(this->localError_));
1221 template <
class Scalar,
class LO,
class GO,
class Node>
1224 const size_t numSameIDs,
1231 using ::Tpetra::Details::Behavior;
1233 using ::Tpetra::Details::ProfilingRegion;
1236 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1237 const bool debug = Behavior::debug();
1238 const bool verbose = Behavior::verbose();
1243 std::ostringstream os;
1244 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1245 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
1250 if (*(this->localError_)) {
1251 std::ostream& err = this->markLocalErrorAndGetStream();
1253 <<
"The target object of the Import or Export is already in an error state."
1262 std::ostringstream os;
1263 os << prefix << endl
1266 std::cerr << os.str();
1272 if (permuteToLIDs.extent(0) != permuteFromLIDs.extent(0)) {
1273 std::ostream& err = this->markLocalErrorAndGetStream();
1275 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent(0)
1276 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
1280 if (permuteToLIDs.need_sync_host() || permuteFromLIDs.need_sync_host()) {
1281 std::ostream& err = this->markLocalErrorAndGetStream();
1283 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1288 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
>(&source);
1290 std::ostream& err = this->markLocalErrorAndGetStream();
1292 <<
"The source (input) object of the Import or "
1293 "Export is either not a BlockCrsMatrix, or does not have the right "
1294 "template parameters. checkSizes() should have caught this. "
1295 "Please report this bug to the Tpetra developers."
1300 bool lclErr =
false;
1301 #ifdef HAVE_TPETRA_DEBUG
1302 std::set<LO> invalidSrcCopyRows;
1303 std::set<LO> invalidDstCopyRows;
1304 std::set<LO> invalidDstCopyCols;
1305 std::set<LO> invalidDstPermuteCols;
1306 std::set<LO> invalidPermuteFromRows;
1307 #endif // HAVE_TPETRA_DEBUG
1316 #ifdef HAVE_TPETRA_DEBUG
1317 const map_type& srcRowMap = *(src->graph_.getRowMap());
1318 #endif // HAVE_TPETRA_DEBUG
1319 const map_type& dstRowMap = *(this->graph_.getRowMap());
1320 const map_type& srcColMap = *(src->graph_.getColMap());
1321 const map_type& dstColMap = *(this->graph_.getColMap());
1322 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs(dstColMap);
1324 const size_t numPermute =
static_cast<size_t>(permuteFromLIDs.extent(0));
1326 std::ostringstream os;
1328 <<
"canUseLocalColumnIndices: "
1329 << (canUseLocalColumnIndices ?
"true" :
"false")
1330 <<
", numPermute: " << numPermute
1332 std::cerr << os.str();
1335 const auto permuteToLIDsHost = permuteToLIDs.view_host();
1336 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
1338 if (canUseLocalColumnIndices) {
1340 for (LO localRow = 0; localRow < static_cast<LO>(numSameIDs); ++localRow) {
1341 #ifdef HAVE_TPETRA_DEBUG
1344 invalidSrcCopyRows.insert(localRow);
1347 #endif // HAVE_TPETRA_DEBUG
1349 local_inds_host_view_type lclSrcCols;
1350 values_host_view_type vals;
1354 src->getLocalRowView(localRow, lclSrcCols, vals);
1355 numEntries = lclSrcCols.extent(0);
1356 if (numEntries > 0) {
1357 LO err = this->replaceLocalValues(localRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1358 if (err != numEntries) {
1361 #ifdef HAVE_TPETRA_DEBUG
1362 invalidDstCopyRows.insert(localRow);
1363 #endif // HAVE_TPETRA_DEBUG
1370 for (LO k = 0; k < numEntries; ++k) {
1373 #ifdef HAVE_TPETRA_DEBUG
1374 (void)invalidDstCopyCols.insert(lclSrcCols[k]);
1375 #endif // HAVE_TPETRA_DEBUG
1384 for (
size_t k = 0; k < numPermute; ++k) {
1385 const LO srcLclRow =
static_cast<LO
>(permuteFromLIDsHost(k));
1386 const LO dstLclRow =
static_cast<LO
>(permuteToLIDsHost(k));
1388 local_inds_host_view_type lclSrcCols;
1389 values_host_view_type vals;
1391 src->getLocalRowView(srcLclRow, lclSrcCols, vals);
1392 numEntries = lclSrcCols.extent(0);
1393 if (numEntries > 0) {
1394 LO err = this->replaceLocalValues(dstLclRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1395 if (err != numEntries) {
1397 #ifdef HAVE_TPETRA_DEBUG
1398 for (LO c = 0; c < numEntries; ++c) {
1400 invalidDstPermuteCols.insert(lclSrcCols[c]);
1403 #endif // HAVE_TPETRA_DEBUG
1409 const size_t maxNumEnt = src->graph_.getLocalMaxNumRowEntries();
1410 Teuchos::Array<LO> lclDstCols(maxNumEnt);
1413 for (LO localRow = 0; localRow < static_cast<LO>(numSameIDs); ++localRow) {
1414 local_inds_host_view_type lclSrcCols;
1415 values_host_view_type vals;
1421 src->getLocalRowView(localRow, lclSrcCols, vals);
1422 numEntries = lclSrcCols.extent(0);
1423 }
catch (std::exception& e) {
1425 std::ostringstream os;
1426 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1427 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1428 << localRow <<
", src->getLocalRowView() threw an exception: "
1430 std::cerr << os.str();
1435 if (numEntries > 0) {
1436 if (static_cast<size_t>(numEntries) > static_cast<size_t>(lclDstCols.size())) {
1439 std::ostringstream os;
1440 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1441 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1442 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
1443 << maxNumEnt << endl;
1444 std::cerr << os.str();
1449 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view(0, numEntries);
1450 for (LO j = 0; j < numEntries; ++j) {
1452 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid()) {
1454 #ifdef HAVE_TPETRA_DEBUG
1455 invalidDstCopyCols.insert(lclDstColsView[j]);
1456 #endif // HAVE_TPETRA_DEBUG
1461 err = this->replaceLocalValues(localRow, lclDstColsView.getRawPtr(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1462 }
catch (std::exception& e) {
1464 std::ostringstream os;
1465 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1466 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1467 << localRow <<
", this->replaceLocalValues() threw an exception: "
1469 std::cerr << os.str();
1473 if (err != numEntries) {
1476 std::ostringstream os;
1477 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1478 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
1480 << localRow <<
", this->replaceLocalValues "
1482 << err <<
" instead of numEntries = "
1483 << numEntries << endl;
1484 std::cerr << os.str();
1492 for (
size_t k = 0; k < numPermute; ++k) {
1493 const LO srcLclRow =
static_cast<LO
>(permuteFromLIDsHost(k));
1494 const LO dstLclRow =
static_cast<LO
>(permuteToLIDsHost(k));
1496 local_inds_host_view_type lclSrcCols;
1497 values_host_view_type vals;
1501 src->getLocalRowView(srcLclRow, lclSrcCols, vals);
1502 numEntries = lclSrcCols.extent(0);
1503 }
catch (std::exception& e) {
1505 std::ostringstream os;
1506 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1507 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
1509 << srcLclRow <<
" and dstLclRow " << dstLclRow
1510 <<
", src->getLocalRowView() threw an exception: " << e.what();
1511 std::cerr << os.str();
1516 if (numEntries > 0) {
1517 if (static_cast<size_t>(numEntries) > static_cast<size_t>(lclDstCols.size())) {
1522 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view(0, numEntries);
1523 for (LO j = 0; j < numEntries; ++j) {
1525 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid()) {
1527 #ifdef HAVE_TPETRA_DEBUG
1528 invalidDstPermuteCols.insert(lclDstColsView[j]);
1529 #endif // HAVE_TPETRA_DEBUG
1532 LO err = this->replaceLocalValues(dstLclRow, lclDstColsView.getRawPtr(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1533 if (err != numEntries) {
1542 std::ostream& err = this->markLocalErrorAndGetStream();
1543 #ifdef HAVE_TPETRA_DEBUG
1544 err <<
"copyAndPermute: The graph structure of the source of the "
1545 "Import or Export must be a subset of the graph structure of the "
1547 err <<
"invalidSrcCopyRows = [";
1548 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin();
1549 it != invalidSrcCopyRows.end(); ++it) {
1551 typename std::set<LO>::const_iterator itp1 = it;
1553 if (itp1 != invalidSrcCopyRows.end()) {
1557 err <<
"], invalidDstCopyRows = [";
1558 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin();
1559 it != invalidDstCopyRows.end(); ++it) {
1561 typename std::set<LO>::const_iterator itp1 = it;
1563 if (itp1 != invalidDstCopyRows.end()) {
1567 err <<
"], invalidDstCopyCols = [";
1568 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin();
1569 it != invalidDstCopyCols.end(); ++it) {
1571 typename std::set<LO>::const_iterator itp1 = it;
1573 if (itp1 != invalidDstCopyCols.end()) {
1577 err <<
"], invalidDstPermuteCols = [";
1578 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin();
1579 it != invalidDstPermuteCols.end(); ++it) {
1581 typename std::set<LO>::const_iterator itp1 = it;
1583 if (itp1 != invalidDstPermuteCols.end()) {
1587 err <<
"], invalidPermuteFromRows = [";
1588 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin();
1589 it != invalidPermuteFromRows.end(); ++it) {
1591 typename std::set<LO>::const_iterator itp1 = it;
1593 if (itp1 != invalidPermuteFromRows.end()) {
1599 err <<
"copyAndPermute: The graph structure of the source of the "
1600 "Import or Export must be a subset of the graph structure of the "
1603 #endif // HAVE_TPETRA_DEBUG
1607 std::ostringstream os;
1608 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1609 const bool lclSuccess = !(*(this->localError_));
1610 os <<
"*** Proc " << myRank <<
": copyAndPermute "
1611 << (lclSuccess ?
"succeeded" :
"FAILED");
1615 os <<
": error messages: " << this->errorMessages();
1617 std::cerr << os.str();
1641 template <
class LO,
class GO>
1643 packRowCount(
const size_t numEnt,
1644 const size_t numBytesPerValue,
1645 const size_t blkSize) {
1646 using ::Tpetra::Details::PackTraits;
1656 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount(gid);
1657 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
1658 return numEntLen + gidsLen + valsLen;
1672 template <
class ST,
class LO,
class GO>
1674 unpackRowCount(
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
1675 const size_t offset,
1676 const size_t numBytes,
1678 using Kokkos::subview;
1679 using ::Tpetra::Details::PackTraits;
1681 if (numBytes == 0) {
1683 return static_cast<size_t>(0);
1687 TEUCHOS_TEST_FOR_EXCEPTION(theNumBytes > numBytes, std::logic_error,
1690 << theNumBytes <<
" < numBytes = " << numBytes
1692 const char*
const inBuf = imports.data() + offset;
1694 TEUCHOS_TEST_FOR_EXCEPTION(actualNumBytes > numBytes, std::logic_error,
1697 << actualNumBytes <<
" < numBytes = " << numBytes
1699 return static_cast<size_t>(numEntLO);
1706 template <
class ST,
class LO,
class GO>
1708 packRowForBlockCrs(
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
1709 const size_t offset,
1710 const size_t numEnt,
1711 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
1712 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
1713 const size_t numBytesPerValue,
1714 const size_t blockSize) {
1715 using Kokkos::subview;
1716 using ::Tpetra::Details::PackTraits;
1722 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1725 const LO numEntLO =
static_cast<size_t>(numEnt);
1727 const size_t numEntBeg = offset;
1729 const size_t gidsBeg = numEntBeg + numEntLen;
1730 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount(gid);
1731 const size_t valsBeg = gidsBeg + gidsLen;
1732 const size_t valsLen = numScalarEnt * numBytesPerValue;
1734 char*
const numEntOut = exports.data() + numEntBeg;
1735 char*
const gidsOut = exports.data() + gidsBeg;
1736 char*
const valsOut = exports.data() + valsBeg;
1738 size_t numBytesOut = 0;
1743 Kokkos::pair<int, size_t> p;
1744 p = PackTraits<GO>::packArray(gidsOut, gidsIn.data(), numEnt);
1745 errorCode += p.first;
1746 numBytesOut += p.second;
1748 p = PackTraits<ST>::packArray(valsOut, valsIn.data(), numScalarEnt);
1749 errorCode += p.first;
1750 numBytesOut += p.second;
1753 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1754 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != expectedNumBytes, std::logic_error,
1755 "packRowForBlockCrs: numBytesOut = " << numBytesOut
1756 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1758 TEUCHOS_TEST_FOR_EXCEPTION(errorCode != 0, std::runtime_error,
1759 "packRowForBlockCrs: "
1760 "PackTraits::packArray returned a nonzero error code");
1766 template <
class ST,
class LO,
class GO>
1768 unpackRowForBlockCrs(
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
1769 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
1770 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
1771 const size_t offset,
1772 const size_t numBytes,
1773 const size_t numEnt,
1774 const size_t numBytesPerValue,
1775 const size_t blockSize) {
1776 using ::Tpetra::Details::PackTraits;
1778 if (numBytes == 0) {
1782 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1783 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(imports.extent(0)) <= offset,
1784 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = " << imports.extent(0) <<
" <= offset = " << offset <<
".");
1785 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(imports.extent(0)) < offset + numBytes,
1786 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = " << imports.extent(0) <<
" < offset + numBytes = " << (offset + numBytes) <<
".");
1791 const size_t numEntBeg = offset;
1793 const size_t gidsBeg = numEntBeg + numEntLen;
1794 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount(gid);
1795 const size_t valsBeg = gidsBeg + gidsLen;
1796 const size_t valsLen = numScalarEnt * numBytesPerValue;
1798 const char*
const numEntIn = imports.data() + numEntBeg;
1799 const char*
const gidsIn = imports.data() + gidsBeg;
1800 const char*
const valsIn = imports.data() + valsBeg;
1802 size_t numBytesOut = 0;
1806 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(numEntOut) != numEnt, std::logic_error,
1807 "unpackRowForBlockCrs: Expected number of entries " << numEnt
1808 <<
" != actual number of entries " << numEntOut <<
".");
1811 Kokkos::pair<int, size_t> p;
1812 p = PackTraits<GO>::unpackArray(gidsOut.data(), gidsIn, numEnt);
1813 errorCode += p.first;
1814 numBytesOut += p.second;
1816 p = PackTraits<ST>::unpackArray(valsOut.data(), valsIn, numScalarEnt);
1817 errorCode += p.first;
1818 numBytesOut += p.second;
1821 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != numBytes, std::logic_error,
1822 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1823 <<
" != numBytes = " << numBytes <<
".");
1825 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1826 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != expectedNumBytes, std::logic_error,
1827 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1828 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1830 TEUCHOS_TEST_FOR_EXCEPTION(errorCode != 0, std::runtime_error,
1831 "unpackRowForBlockCrs: "
1832 "PackTraits::unpackArray returned a nonzero error code");
1838 template <
class Scalar,
class LO,
class GO,
class Node>
1845 Kokkos::DualView<
size_t*,
1848 size_t& constantNumPackets) {
1849 using ::Tpetra::Details::Behavior;
1851 using ::Tpetra::Details::PackTraits;
1852 using ::Tpetra::Details::ProfilingRegion;
1854 typedef typename Kokkos::View<int*, device_type>::host_mirror_type::execution_space host_exec;
1858 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
1860 const bool debug = Behavior::debug();
1861 const bool verbose = Behavior::verbose();
1866 std::ostringstream os;
1867 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1868 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
1873 if (*(this->localError_)) {
1874 std::ostream& err = this->markLocalErrorAndGetStream();
1876 <<
"The target object of the Import or Export is already in an error state."
1885 std::ostringstream os;
1886 os << prefix << std::endl
1890 std::cerr << os.str();
1896 if (exportLIDs.extent(0) != numPacketsPerLID.extent(0)) {
1897 std::ostream& err = this->markLocalErrorAndGetStream();
1899 <<
"exportLIDs.extent(0) = " << exportLIDs.extent(0)
1900 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
1901 <<
"." << std::endl;
1904 if (exportLIDs.need_sync_host()) {
1905 std::ostream& err = this->markLocalErrorAndGetStream();
1906 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
1910 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
>(&source);
1912 std::ostream& err = this->markLocalErrorAndGetStream();
1914 <<
"The source (input) object of the Import or "
1915 "Export is either not a BlockCrsMatrix, or does not have the right "
1916 "template parameters. checkSizes() should have caught this. "
1917 "Please report this bug to the Tpetra developers."
1930 constantNumPackets = 0;
1934 const size_t blockSize =
static_cast<size_t>(src->getBlockSize());
1935 const size_t numExportLIDs = exportLIDs.extent(0);
1936 size_t numBytesPerValue(0);
1938 auto val_host = val_.getHostView(Access::ReadOnly);
1940 PackTraits<impl_scalar_type>::packValueCount(val_host.extent(0) ? val_host(0) :
impl_scalar_type());
1947 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
1950 auto exportLIDsHost = exportLIDs.view_host();
1951 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
1952 numPacketsPerLID.modify_host();
1954 rowReducerStruct = Impl::BlockCrsRowStruct<size_t>();
1955 for (
size_t i = 0; i < numExportLIDs; ++i) {
1956 const LO lclRow = exportLIDsHost(i);
1958 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid() ? 0 : numEnt);
1960 const size_t numBytes =
1961 packRowCount<LO, GO>(numEnt, numBytesPerValue, blockSize);
1962 numPacketsPerLIDHost(i) = numBytes;
1963 rowReducerStruct += Impl::BlockCrsRowStruct<size_t>(numEnt, numBytes, numEnt);
1970 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
1971 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
1972 const size_t maxRowLength = rowReducerStruct.maxRowLength;
1975 std::ostringstream os;
1977 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
1979 std::cerr << os.str();
1985 if (exports.extent(0) != totalNumBytes) {
1986 const std::string oldLabel = exports.view_device().label();
1987 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
1988 exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
1990 if (totalNumEntries > 0) {
1992 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs + 1);
1994 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs + 1);
1995 Kokkos::parallel_scan(policy,
1996 [=](
const size_t& i,
size_t& update,
const bool&
final) {
1997 if (
final) offset(i) = update;
1998 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
2001 if (offset(numExportLIDs) != totalNumBytes) {
2002 std::ostream& err = this->markLocalErrorAndGetStream();
2004 <<
"At end of method, the final offset (in bytes) "
2005 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
2006 << totalNumBytes <<
". "
2007 <<
"Please report this bug to the Tpetra developers." << std::endl;
2021 exports.modify_host();
2023 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2025 policy_type(numExportLIDs, 1, 1)
2026 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO) * maxRowLength));
2031 Kokkos::parallel_for(policy,
2032 [=](
const typename policy_type::member_type& member) {
2033 const size_t i = member.league_rank();
2034 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2035 gblColInds(member.team_scratch(0), maxRowLength);
2037 const LO lclRowInd = exportLIDsHost(i);
2038 local_inds_host_view_type lclColInds;
2039 values_host_view_type vals;
2044 src->getLocalRowView(lclRowInd, lclColInds, vals);
2045 const size_t numEnt = lclColInds.extent(0);
2048 for (
size_t j = 0; j < numEnt; ++j)
2055 const size_t numBytes =
2056 packRowForBlockCrs<impl_scalar_type, LO, GO>(exports.view_host(),
2059 Kokkos::View<const GO*, host_exec>(gblColInds.data(), maxRowLength),
2066 const size_t offsetDiff = offset(i + 1) - offset(i);
2067 if (numBytes != offsetDiff) {
2068 std::ostringstream os;
2070 <<
"numBytes computed from packRowForBlockCrs is different from "
2071 <<
"precomputed offset values, LID = " << i << std::endl;
2072 std::cerr << os.str();
2081 std::ostringstream os;
2082 const bool lclSuccess = !(*(this->localError_));
2084 << (lclSuccess ?
"succeeded" :
"FAILED")
2086 std::cerr << os.str();
2090 template <
class Scalar,
class LO,
class GO,
class Node>
2097 Kokkos::DualView<
size_t*,
2103 using ::Tpetra::Details::Behavior;
2105 using ::Tpetra::Details::PackTraits;
2106 using ::Tpetra::Details::ProfilingRegion;
2108 typename Kokkos::View<int*, device_type>::host_mirror_type::execution_space;
2110 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2111 const bool verbose = Behavior::verbose();
2116 std::ostringstream os;
2117 auto map = this->graph_.getRowMap();
2118 auto comm = map.is_null() ? Teuchos::null : map->getComm();
2119 const int myRank = comm.is_null() ? -1 : comm->getRank();
2120 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2123 os <<
"Start" << endl;
2124 std::cerr << os.str();
2129 if (*(this->localError_)) {
2130 std::ostream& err = this->markLocalErrorAndGetStream();
2131 std::ostringstream os;
2132 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
2134 std::cerr << os.str();
2143 if (importLIDs.extent(0) != numPacketsPerLID.extent(0)) {
2144 std::ostream& err = this->markLocalErrorAndGetStream();
2145 std::ostringstream os;
2146 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent(0)
2147 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2150 std::cerr << os.str();
2156 if (combineMode !=
ADD && combineMode !=
INSERT &&
2158 combineMode !=
ZERO) {
2159 std::ostream& err = this->markLocalErrorAndGetStream();
2160 std::ostringstream os;
2162 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
2163 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2166 std::cerr << os.str();
2172 if (this->graph_.getColMap().is_null()) {
2173 std::ostream& err = this->markLocalErrorAndGetStream();
2174 std::ostringstream os;
2175 os << prefix <<
"Target matrix's column Map is null." << endl;
2177 std::cerr << os.str();
2186 const map_type& tgtColMap = *(this->graph_.getColMap());
2189 const size_t blockSize = this->getBlockSize();
2190 const size_t numImportLIDs = importLIDs.extent(0);
2197 size_t numBytesPerValue(0);
2199 auto val_host = val_.getHostView(Access::ReadOnly);
2201 PackTraits<impl_scalar_type>::packValueCount(val_host.extent(0) ? val_host(0) :
impl_scalar_type());
2203 const size_t maxRowNumEnt = graph_.getLocalMaxNumRowEntries();
2204 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
2207 std::ostringstream os;
2208 os << prefix <<
"combineMode: "
2209 << ::Tpetra::combineModeToString(combineMode)
2210 <<
", blockSize: " << blockSize
2211 <<
", numImportLIDs: " << numImportLIDs
2212 <<
", numBytesPerValue: " << numBytesPerValue
2213 <<
", maxRowNumEnt: " << maxRowNumEnt
2214 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
2215 std::cerr << os.str();
2218 if (combineMode ==
ZERO || numImportLIDs == 0) {
2220 std::ostringstream os;
2221 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
2222 std::cerr << os.str();
2229 if (imports.need_sync_host()) {
2230 imports.sync_host();
2240 if (imports.need_sync_host() || numPacketsPerLID.need_sync_host() ||
2241 importLIDs.need_sync_host()) {
2242 std::ostream& err = this->markLocalErrorAndGetStream();
2243 std::ostringstream os;
2244 os << prefix <<
"All input DualViews must be sync'd to host by now. "
2245 <<
"imports_nc.need_sync_host()="
2246 << (imports.need_sync_host() ?
"true" :
"false")
2247 <<
", numPacketsPerLID.need_sync_host()="
2248 << (numPacketsPerLID.need_sync_host() ?
"true" :
"false")
2249 <<
", importLIDs.need_sync_host()="
2250 << (importLIDs.need_sync_host() ?
"true" :
"false")
2253 std::cerr << os.str();
2259 const auto importLIDsHost = importLIDs.view_host();
2260 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2266 Kokkos::View<size_t*, host_exec> offset(
"offset", numImportLIDs + 1);
2268 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numImportLIDs + 1);
2269 Kokkos::parallel_scan(
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
2270 [=](
const size_t& i,
size_t& update,
const bool&
final) {
2271 if (
final) offset(i) = update;
2272 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
2282 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2283 errorDuringUnpack(
"errorDuringUnpack");
2284 errorDuringUnpack() = 0;
2286 using policy_type = Kokkos::TeamPolicy<host_exec>;
2287 size_t scratch_per_row =
sizeof(GO) * maxRowNumEnt +
sizeof(LO) * maxRowNumEnt + numBytesPerValue * maxRowNumScalarEnt + 2 *
sizeof(GO);
2289 const auto policy = policy_type(numImportLIDs, 1, 1)
2290 .set_scratch_size(0, Kokkos::PerTeam(scratch_per_row));
2291 using host_scratch_space =
typename host_exec::scratch_memory_space;
2293 using pair_type = Kokkos::pair<size_t, size_t>;
2296 getValuesHostNonConst();
2298 Kokkos::parallel_for(
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
2299 [=, *
this](
const typename policy_type::member_type& member) {
2300 const size_t i = member.league_rank();
2301 Kokkos::View<GO*, host_scratch_space> gblColInds(member.team_scratch(0), maxRowNumEnt);
2302 Kokkos::View<LO*, host_scratch_space> lclColInds(member.team_scratch(0), maxRowNumEnt);
2303 Kokkos::View<impl_scalar_type*, host_scratch_space> vals(member.team_scratch(0), maxRowNumScalarEnt);
2305 const size_t offval = offset(i);
2306 const LO lclRow = importLIDsHost(i);
2307 const size_t numBytes = numPacketsPerLIDHost(i);
2308 const size_t numEnt =
2309 unpackRowCount<impl_scalar_type, LO, GO>(imports.view_host(), offval, numBytes, numBytesPerValue);
2312 if (numEnt > maxRowNumEnt) {
2313 errorDuringUnpack() = 1;
2315 std::ostringstream os;
2317 <<
"At i = " << i <<
", numEnt = " << numEnt
2318 <<
" > maxRowNumEnt = " << maxRowNumEnt
2320 std::cerr << os.str();
2325 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2326 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
2327 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
2328 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
2333 size_t numBytesOut = 0;
2336 unpackRowForBlockCrs<impl_scalar_type, LO, GO>(Kokkos::View<GO*, host_exec>(gidsOut.data(), numEnt),
2337 Kokkos::View<impl_scalar_type*, host_exec>(valsOut.data(), numScalarEnt),
2338 imports.view_host(),
2339 offval, numBytes, numEnt,
2340 numBytesPerValue, blockSize);
2341 }
catch (std::exception& e) {
2342 errorDuringUnpack() = 1;
2344 std::ostringstream os;
2345 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
2346 << e.what() << endl;
2347 std::cerr << os.str();
2352 if (numBytes != numBytesOut) {
2353 errorDuringUnpack() = 1;
2355 std::ostringstream os;
2357 <<
"At i = " << i <<
", numBytes = " << numBytes
2358 <<
" != numBytesOut = " << numBytesOut <<
"."
2360 std::cerr << os.str();
2366 for (
size_t k = 0; k < numEnt; ++k) {
2368 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid()) {
2370 std::ostringstream os;
2372 <<
"At i = " << i <<
", GID " << gidsOut(k)
2373 <<
" is not owned by the calling process."
2375 std::cerr << os.str();
2383 const LO*
const lidsRaw =
const_cast<const LO*
>(lidsOut.data());
2384 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>(
const_cast<const impl_scalar_type*
>(valsOut.data()));
2385 if (combineMode ==
ADD) {
2386 numCombd = this->sumIntoLocalValues(lclRow, lidsRaw, valsRaw, numEnt);
2387 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
2388 numCombd = this->replaceLocalValues(lclRow, lidsRaw, valsRaw, numEnt);
2389 }
else if (combineMode ==
ABSMAX) {
2390 numCombd = this->absMaxLocalValues(lclRow, lidsRaw, valsRaw, numEnt);
2393 if (static_cast<LO>(numEnt) != numCombd) {
2394 errorDuringUnpack() = 1;
2396 std::ostringstream os;
2397 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
2398 <<
" != numCombd = " << numCombd <<
"."
2400 std::cerr << os.str();
2408 if (errorDuringUnpack() != 0) {
2409 std::ostream& err = this->markLocalErrorAndGetStream();
2410 err << prefix <<
"Unpacking failed.";
2412 err <<
" Please run again with the environment variable setting "
2413 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
2419 std::ostringstream os;
2420 const bool lclSuccess = !(*(this->localError_));
2422 << (lclSuccess ?
"succeeded" :
"FAILED")
2424 std::cerr << os.str();
2428 template <
class Scalar,
class LO,
class GO,
class Node>
2431 using Teuchos::TypeNameTraits;
2432 std::ostringstream os;
2433 os <<
"\"Tpetra::BlockCrsMatrix\": { "
2434 <<
"Template parameters: { "
2435 <<
"Scalar: " << TypeNameTraits<Scalar>::name()
2436 <<
"LO: " << TypeNameTraits<LO>::name()
2437 <<
"GO: " << TypeNameTraits<GO>::name()
2438 <<
"Node: " << TypeNameTraits<Node>::name()
2440 <<
", Label: \"" << this->getObjectLabel() <<
"\""
2441 <<
", Global dimensions: ["
2442 << graph_.getDomainMap()->getGlobalNumElements() <<
", "
2443 << graph_.getRangeMap()->getGlobalNumElements() <<
"]"
2444 <<
", Block size: " << getBlockSize()
2449 template <
class Scalar,
class LO,
class GO,
class Node>
2452 const Teuchos::EVerbosityLevel verbLevel)
const {
2453 using Teuchos::ArrayRCP;
2454 using Teuchos::CommRequest;
2455 using Teuchos::FancyOStream;
2456 using Teuchos::getFancyOStream;
2457 using Teuchos::ireceive;
2458 using Teuchos::isend;
2459 using Teuchos::outArg;
2460 using Teuchos::TypeNameTraits;
2461 using Teuchos::VERB_DEFAULT;
2462 using Teuchos::VERB_LOW;
2463 using Teuchos::VERB_MEDIUM;
2464 using Teuchos::VERB_NONE;
2468 using Teuchos::VERB_EXTREME;
2469 using Teuchos::wait;
2472 const Teuchos::EVerbosityLevel vl =
2473 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2475 if (vl == VERB_NONE) {
2480 Teuchos::OSTab tab0(out);
2482 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
2483 Teuchos::OSTab tab1(out);
2485 out <<
"Template parameters:" << endl;
2487 Teuchos::OSTab tab2(out);
2488 out <<
"Scalar: " << TypeNameTraits<Scalar>::name() << endl
2489 <<
"LO: " << TypeNameTraits<LO>::name() << endl
2490 <<
"GO: " << TypeNameTraits<GO>::name() << endl
2491 <<
"Node: " << TypeNameTraits<Node>::name() << endl;
2493 out <<
"Label: \"" << this->getObjectLabel() <<
"\"" << endl
2494 <<
"Global dimensions: ["
2495 << graph_.getDomainMap()->getGlobalNumElements() <<
", "
2496 << graph_.getRangeMap()->getGlobalNumElements() <<
"]" << endl;
2498 const LO blockSize = getBlockSize();
2499 out <<
"Block size: " << blockSize << endl;
2502 if (vl >= VERB_MEDIUM) {
2503 const Teuchos::Comm<int>& comm = *(graph_.getMap()->getComm());
2504 const int myRank = comm.getRank();
2506 out <<
"Row Map:" << endl;
2508 getRowMap()->describe(out, vl);
2510 if (!getColMap().is_null()) {
2511 if (getColMap() == getRowMap()) {
2513 out <<
"Column Map: same as row Map" << endl;
2517 out <<
"Column Map:" << endl;
2519 getColMap()->describe(out, vl);
2522 if (!getDomainMap().is_null()) {
2523 if (getDomainMap() == getRowMap()) {
2525 out <<
"Domain Map: same as row Map" << endl;
2527 }
else if (getDomainMap() == getColMap()) {
2529 out <<
"Domain Map: same as column Map" << endl;
2533 out <<
"Domain Map:" << endl;
2535 getDomainMap()->describe(out, vl);
2538 if (!getRangeMap().is_null()) {
2539 if (getRangeMap() == getDomainMap()) {
2541 out <<
"Range Map: same as domain Map" << endl;
2543 }
else if (getRangeMap() == getRowMap()) {
2545 out <<
"Range Map: same as row Map" << endl;
2549 out <<
"Range Map: " << endl;
2551 getRangeMap()->describe(out, vl);
2556 if (vl >= VERB_EXTREME) {
2557 const Teuchos::Comm<int>& comm = *(graph_.getMap()->getComm());
2558 const int myRank = comm.getRank();
2559 const int numProcs = comm.getSize();
2562 RCP<std::ostringstream> lclOutStrPtr(
new std::ostringstream());
2563 RCP<FancyOStream> osPtr = getFancyOStream(lclOutStrPtr);
2564 FancyOStream& os = *osPtr;
2565 os <<
"Process " << myRank <<
":" << endl;
2566 Teuchos::OSTab tab2(os);
2568 const map_type& meshRowMap = *(graph_.getRowMap());
2569 const map_type& meshColMap = *(graph_.getColMap());
2574 os <<
"Row " << meshGblRow <<
": {";
2576 local_inds_host_view_type lclColInds;
2577 values_host_view_type vals;
2579 this->getLocalRowView(meshLclRow, lclColInds, vals);
2580 numInds = lclColInds.extent(0);
2582 for (LO k = 0; k < numInds; ++k) {
2585 os <<
"Col " << gblCol <<
": [";
2586 for (LO i = 0; i < blockSize; ++i) {
2587 for (LO j = 0; j < blockSize; ++j) {
2588 os << vals[blockSize * blockSize * k + i * blockSize + j];
2589 if (j + 1 < blockSize) {
2593 if (i + 1 < blockSize) {
2598 if (k + 1 < numInds) {
2608 out << lclOutStrPtr->str();
2609 lclOutStrPtr = Teuchos::null;
2612 const int sizeTag = 1337;
2613 const int dataTag = 1338;
2615 ArrayRCP<char> recvDataBuf;
2619 for (
int p = 1; p < numProcs; ++p) {
2622 ArrayRCP<size_t> recvSize(1);
2624 RCP<CommRequest<int> > recvSizeReq =
2625 ireceive<int, size_t>(recvSize, p, sizeTag, comm);
2626 wait<int>(comm, outArg(recvSizeReq));
2627 const size_t numCharsToRecv = recvSize[0];
2634 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
2635 recvDataBuf.resize(numCharsToRecv + 1);
2637 ArrayRCP<char> recvData = recvDataBuf.persistingView(0, numCharsToRecv);
2639 RCP<CommRequest<int> > recvDataReq =
2640 ireceive<int, char>(recvData, p, dataTag, comm);
2641 wait<int>(comm, outArg(recvDataReq));
2646 recvDataBuf[numCharsToRecv] =
'\0';
2647 out << recvDataBuf.getRawPtr();
2648 }
else if (myRank == p) {
2652 const std::string stringToSend = lclOutStrPtr->str();
2653 lclOutStrPtr = Teuchos::null;
2656 const size_t numCharsToSend = stringToSend.size();
2657 ArrayRCP<size_t> sendSize(1);
2658 sendSize[0] = numCharsToSend;
2659 RCP<CommRequest<int> > sendSizeReq =
2660 isend<int, size_t>(sendSize, 0, sizeTag, comm);
2661 wait<int>(comm, outArg(sendSizeReq));
2669 ArrayRCP<const char> sendData(&stringToSend[0], 0, numCharsToSend,
false);
2670 RCP<CommRequest<int> > sendDataReq =
2671 isend<int, char>(sendData, 0, dataTag, comm);
2672 wait<int>(comm, outArg(sendDataReq));
2678 template <
class Scalar,
class LO,
class GO,
class Node>
2679 Teuchos::RCP<const Teuchos::Comm<int> >
2682 return graph_.getComm();
2685 template <
class Scalar,
class LO,
class GO,
class Node>
2689 return graph_.getGlobalNumCols();
2692 template <
class Scalar,
class LO,
class GO,
class Node>
2696 return graph_.getLocalNumCols();
2699 template <
class Scalar,
class LO,
class GO,
class Node>
2702 return graph_.getIndexBase();
2705 template <
class Scalar,
class LO,
class GO,
class Node>
2709 return graph_.getGlobalNumEntries();
2712 template <
class Scalar,
class LO,
class GO,
class Node>
2716 return graph_.getLocalNumEntries();
2719 template <
class Scalar,
class LO,
class GO,
class Node>
2723 return graph_.getNumEntriesInGlobalRow(globalRow);
2726 template <
class Scalar,
class LO,
class GO,
class Node>
2730 return graph_.getGlobalMaxNumRowEntries();
2733 template <
class Scalar,
class LO,
class GO,
class Node>
2736 return graph_.hasColMap();
2739 template <
class Scalar,
class LO,
class GO,
class Node>
2742 return graph_.isLocallyIndexed();
2745 template <
class Scalar,
class LO,
class GO,
class Node>
2748 return graph_.isGloballyIndexed();
2751 template <
class Scalar,
class LO,
class GO,
class Node>
2754 return graph_.isFillComplete();
2757 template <
class Scalar,
class LO,
class GO,
class Node>
2763 template <
class Scalar,
class LO,
class GO,
class Node>
2766 nonconst_global_inds_host_view_type& ,
2767 nonconst_values_host_view_type& ,
2769 TEUCHOS_TEST_FOR_EXCEPTION(
2770 true, std::logic_error,
2771 "Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
2772 "This class doesn't support global matrix indexing.");
2775 template <
class Scalar,
class LO,
class GO,
class Node>
2778 global_inds_host_view_type& ,
2779 values_host_view_type& )
const {
2780 TEUCHOS_TEST_FOR_EXCEPTION(
2781 true, std::logic_error,
2782 "Tpetra::BlockCrsMatrix::getGlobalRowView: "
2783 "This class doesn't support global matrix indexing.");
2786 template <
class Scalar,
class LO,
class GO,
class Node>
2789 local_inds_host_view_type& colInds,
2790 values_host_view_type& vals)
const {
2791 if (!rowMeshMap_.isNodeLocalElement(localRowInd)) {
2792 colInds = local_inds_host_view_type();
2793 vals = values_host_view_type();
2795 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2796 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2797 colInds = local_inds_host_view_type(indHost_.data() + absBlockOffsetStart, numInds);
2799 vals = getValuesHost(localRowInd);
2803 template <
class Scalar,
class LO,
class GO,
class Node>
2806 local_inds_host_view_type& colInds,
2807 nonconst_values_host_view_type& vals)
const {
2808 if (!rowMeshMap_.isNodeLocalElement(localRowInd)) {
2809 colInds = nonconst_local_inds_host_view_type();
2810 vals = nonconst_values_host_view_type();
2812 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2813 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2814 colInds = local_inds_host_view_type(indHost_.data() + absBlockOffsetStart, numInds);
2821 template <
class Scalar,
class LO,
class GO,
class Node>
2824 const size_t lclNumMeshRows = graph_.getLocalNumRows();
2826 Kokkos::View<size_t*, device_type> diagOffsets(
"diagOffsets", lclNumMeshRows);
2827 graph_.getLocalDiagOffsets(diagOffsets);
2830 auto diagOffsetsHost = Kokkos::create_mirror_view(diagOffsets);
2834 auto vals_host_out = val_.getHostView(Access::ReadOnly);
2838 size_t rowOffset = 0;
2840 LO bs = getBlockSize();
2841 for (
size_t r = 0; r < getLocalNumRows(); r++) {
2843 offset = rowOffset + diagOffsetsHost(r) * bs * bs;
2844 for (
int b = 0; b < bs; b++) {
2848 rowOffset += getNumEntriesInLocalRow(r) * bs * bs;
2854 template <
class Scalar,
class LO,
class GO,
class Node>
2857 TEUCHOS_TEST_FOR_EXCEPTION(
2858 true, std::logic_error,
2859 "Tpetra::BlockCrsMatrix::leftScale: "
2860 "not implemented.");
2863 template <
class Scalar,
class LO,
class GO,
class Node>
2866 TEUCHOS_TEST_FOR_EXCEPTION(
2867 true, std::logic_error,
2868 "Tpetra::BlockCrsMatrix::rightScale: "
2869 "not implemented.");
2872 template <
class Scalar,
class LO,
class GO,
class Node>
2873 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
2879 template <
class Scalar,
class LO,
class GO,
class Node>
2880 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
2883 TEUCHOS_TEST_FOR_EXCEPTION(
2884 true, std::logic_error,
2885 "Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
2886 "not implemented.");
2896 #define TPETRA_BLOCKCRSMATRIX_INSTANT(S, LO, GO, NODE) \
2897 template class BlockCrsMatrix<S, LO, GO, NODE>;
2899 #endif // TPETRA_BLOCKCRSMATRIX_DEF_HPP
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
Kokkos::View< impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
std::string description() const override
One-line description of this object.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
void exportAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Export< LO, GO, Node > &exporter) const
Import from this to the given destination matrix, and make the result fill complete.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
LO local_ordinal_type
The type of local indices.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
One or more distributed dense vectors.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Linear algebra kernels for small dense matrices and vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Scalar scalar_type
The type of entries in the matrix (that is, of each entry in each block).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
size_t getLocalNumRows() const override
get the local number of block rows
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
Kokkos::LayoutRight BlockCrsMatrixLittleBlockArrayLayout
give an option to use layoutleft
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
local_ordinal_type getMinLocalIndex() const
The minimum local index.
MultiVector for multiple degrees of freedom per mesh point.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
size_t global_size_t
Global size_t object.
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
global_size_t getGlobalNumRows() const override
get the global number of block rows
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value)
Replace current value at the specified location with specified values.
local_ordinal_type getMaxLocalIndex() const
The maximum local index on the calling process.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
CombineMode
Rule for combining data in an Import or Export.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
Replace old value with maximum of magnitudes of old and new values.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
Replace existing values with new values.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
Replace old values with zero.
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
char packet_type
Implementation detail; tells.
local_matrix_device_type getLocalMatrixDevice() const
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
A distributed dense vector.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Base class for distributed Tpetra objects that support data redistribution.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
void importAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Import< LO, GO, Node > &importer) const
Import from this to the given destination matrix, and make the result fill complete.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row. ...