42 #ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
43 #define TPETRA_BLOCKCRSMATRIX_DEF_HPP
51 #include "Tpetra_BlockMultiVector.hpp"
54 #include "Teuchos_TimeMonitor.hpp"
55 #ifdef HAVE_TPETRA_DEBUG
57 #endif // HAVE_TPETRA_DEBUG
71 #if defined(__CUDACC__)
73 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
74 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
75 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
77 #elif defined(__GNUC__)
79 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
80 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
81 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
83 #else // some other compiler
86 # if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
87 # define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1
88 # endif // ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
89 #endif // defined(__CUDACC__), defined(__GNUC__)
97 struct BlockCrsRowStruct {
98 T totalNumEntries, totalNumBytes, maxRowLength;
99 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct() =
default;
100 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
101 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
102 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
107 KOKKOS_INLINE_FUNCTION
108 void operator+=(
volatile BlockCrsRowStruct<T> &a,
109 volatile const BlockCrsRowStruct<T> &b) {
110 a.totalNumEntries += b.totalNumEntries;
111 a.totalNumBytes += b.totalNumBytes;
112 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
117 KOKKOS_INLINE_FUNCTION
118 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
119 a.totalNumEntries += b.totalNumEntries;
120 a.totalNumBytes += b.totalNumBytes;
121 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
124 template<
typename T,
typename ExecSpace>
125 struct BlockCrsReducer {
126 typedef BlockCrsReducer reducer;
127 typedef T value_type;
128 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
131 KOKKOS_INLINE_FUNCTION
132 BlockCrsReducer(value_type &val) : value(&val) {}
134 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
135 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type &dst,
const volatile value_type &src)
const { dst += src; }
136 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
137 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
138 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
141 template<
class AlphaCoeffType,
143 class MatrixValuesType,
148 class BcrsApplyNoTransFunctor {
150 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
151 "MatrixValuesType must be a Kokkos::View.");
152 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
153 "OutVecType must be a Kokkos::View.");
154 static_assert (Kokkos::Impl::is_view<InVecType>::value,
155 "InVecType must be a Kokkos::View.");
156 static_assert (std::is_same<MatrixValuesType,
157 typename MatrixValuesType::const_type>::value,
158 "MatrixValuesType must be a const Kokkos::View.");
159 static_assert (std::is_same<OutVecType,
160 typename OutVecType::non_const_type>::value,
161 "OutVecType must be a nonconst Kokkos::View.");
162 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
163 "InVecType must be a const Kokkos::View.");
164 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
165 "MatrixValuesType must be a rank-1 Kokkos::View.");
166 static_assert (static_cast<int> (InVecType::rank) == 1,
167 "InVecType must be a rank-1 Kokkos::View.");
168 static_assert (static_cast<int> (OutVecType::rank) == 1,
169 "OutVecType must be a rank-1 Kokkos::View.");
170 typedef typename MatrixValuesType::non_const_value_type scalar_type;
173 typedef typename GraphType::device_type device_type;
176 typedef typename std::remove_const<typename GraphType::data_type>::type
185 void setX (
const InVecType& X) { X_ = X; }
193 void setY (
const OutVecType& Y) { Y_ = Y; }
195 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
196 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
199 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
200 const GraphType& graph,
201 const MatrixValuesType& val,
202 const local_ordinal_type blockSize,
204 const beta_coeff_type& beta,
205 const OutVecType& Y) :
207 ptr_ (graph.row_map),
208 ind_ (graph.entries),
210 blockSize_ (blockSize),
217 KOKKOS_INLINE_FUNCTION
void
218 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
220 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
224 KOKKOS_INLINE_FUNCTION
void
225 operator () (
const local_ordinal_type& lclRow)
const
231 using Kokkos::Details::ArithTraits;
237 using Kokkos::parallel_for;
238 using Kokkos::subview;
239 typedef typename decltype (ptr_)::non_const_value_type offset_type;
240 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
243 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
246 const offset_type Y_ptBeg = lclRow * blockSize_;
247 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
248 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
252 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
253 FILL (Y_cur, ArithTraits<beta_coeff_type>::zero ());
255 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
259 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
260 const offset_type blkBeg = ptr_[lclRow];
261 const offset_type blkEnd = ptr_[lclRow+1];
263 const offset_type bs2 = blockSize_ * blockSize_;
264 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
266 little_block_type A_cur (val_.data () + absBlkOff * bs2,
267 blockSize_, blockSize_);
268 const offset_type X_blkCol = ind_[absBlkOff];
269 const offset_type X_ptBeg = X_blkCol * blockSize_;
270 const offset_type X_ptEnd = X_ptBeg + blockSize_;
271 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
273 GEMV (alpha_, A_cur, X_cur, Y_cur);
279 alpha_coeff_type alpha_;
280 typename GraphType::row_map_type::const_type ptr_;
281 typename GraphType::entries_type::const_type ind_;
282 MatrixValuesType val_;
285 beta_coeff_type beta_;
289 template<
class AlphaCoeffType,
291 class MatrixValuesType,
295 class BcrsApplyNoTransFunctor<AlphaCoeffType,
303 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
304 "MatrixValuesType must be a Kokkos::View.");
305 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
306 "OutVecType must be a Kokkos::View.");
307 static_assert (Kokkos::Impl::is_view<InVecType>::value,
308 "InVecType must be a Kokkos::View.");
309 static_assert (std::is_same<MatrixValuesType,
310 typename MatrixValuesType::const_type>::value,
311 "MatrixValuesType must be a const Kokkos::View.");
312 static_assert (std::is_same<OutVecType,
313 typename OutVecType::non_const_type>::value,
314 "OutVecType must be a nonconst Kokkos::View.");
315 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
316 "InVecType must be a const Kokkos::View.");
317 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
318 "MatrixValuesType must be a rank-1 Kokkos::View.");
319 static_assert (static_cast<int> (InVecType::rank) == 1,
320 "InVecType must be a rank-1 Kokkos::View.");
321 static_assert (static_cast<int> (OutVecType::rank) == 1,
322 "OutVecType must be a rank-1 Kokkos::View.");
323 typedef typename MatrixValuesType::non_const_value_type scalar_type;
326 typedef typename GraphType::device_type device_type;
329 typedef typename std::remove_const<typename GraphType::data_type>::type
338 void setX (
const InVecType& X) { X_ = X; }
346 void setY (
const OutVecType& Y) { Y_ = Y; }
348 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
349 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
352 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
353 const GraphType& graph,
354 const MatrixValuesType& val,
355 const local_ordinal_type blockSize,
357 const beta_coeff_type& beta,
358 const OutVecType& Y) :
360 ptr_ (graph.row_map),
361 ind_ (graph.entries),
363 blockSize_ (blockSize),
370 KOKKOS_INLINE_FUNCTION
void
371 operator () (
const local_ordinal_type& lclRow)
const
373 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
377 KOKKOS_INLINE_FUNCTION
void
378 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
382 using Kokkos::Details::ArithTraits;
388 using Kokkos::parallel_for;
389 using Kokkos::subview;
390 typedef typename decltype (ptr_)::non_const_value_type offset_type;
391 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
394 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
397 const offset_type Y_ptBeg = lclRow * blockSize_;
398 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
399 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
403 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
404 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
405 [&](
const local_ordinal_type &i) {
406 Y_cur(i) = ArithTraits<beta_coeff_type>::zero ();
409 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
410 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
411 [&](
const local_ordinal_type &i) {
415 member.team_barrier();
417 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
418 const offset_type blkBeg = ptr_[lclRow];
419 const offset_type blkEnd = ptr_[lclRow+1];
421 const offset_type bs2 = blockSize_ * blockSize_;
422 little_block_type A_cur (val_.data (), blockSize_, blockSize_);
423 auto X_cur = subview (X_, ::Kokkos::make_pair (0, blockSize_));
425 (Kokkos::TeamThreadRange(member, blkBeg, blkEnd),
426 [&](
const local_ordinal_type &absBlkOff) {
427 A_cur.assign_data(val_.data () + absBlkOff * bs2);
428 const offset_type X_blkCol = ind_[absBlkOff];
429 const offset_type X_ptBeg = X_blkCol * blockSize_;
430 X_cur.assign_data(&X_(X_ptBeg));
433 (Kokkos::ThreadVectorRange(member, blockSize_),
434 [&](
const local_ordinal_type &k0) {
436 for (local_ordinal_type k1=0;k1<blockSize_;++k1)
437 val += A_cur(k0,k1)*X_cur(k1);
438 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
440 Y_cur(k0) += alpha_*val;
446 Kokkos::atomic_add(&Y_cur(k0), alpha_*val);
454 alpha_coeff_type alpha_;
455 typename GraphType::row_map_type::const_type ptr_;
456 typename GraphType::entries_type::const_type ind_;
457 MatrixValuesType val_;
460 beta_coeff_type beta_;
465 template<
class AlphaCoeffType,
467 class MatrixValuesType,
468 class InMultiVecType,
470 class OutMultiVecType>
472 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
473 const GraphType& graph,
474 const MatrixValuesType& val,
475 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
476 const InMultiVecType& X,
477 const BetaCoeffType& beta,
478 const OutMultiVecType& Y
481 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
482 "MatrixValuesType must be a Kokkos::View.");
483 static_assert (Kokkos::Impl::is_view<OutMultiVecType>::value,
484 "OutMultiVecType must be a Kokkos::View.");
485 static_assert (Kokkos::Impl::is_view<InMultiVecType>::value,
486 "InMultiVecType must be a Kokkos::View.");
487 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
488 "MatrixValuesType must be a rank-1 Kokkos::View.");
489 static_assert (static_cast<int> (OutMultiVecType::rank) == 2,
490 "OutMultiVecType must be a rank-2 Kokkos::View.");
491 static_assert (static_cast<int> (InMultiVecType::rank) == 2,
492 "InMultiVecType must be a rank-2 Kokkos::View.");
494 typedef typename GraphType::device_type::execution_space execution_space;
495 typedef typename MatrixValuesType::const_type matrix_values_type;
496 typedef typename OutMultiVecType::non_const_type out_multivec_type;
497 typedef typename InMultiVecType::const_type in_multivec_type;
498 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_type;
499 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_type;
500 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
502 constexpr
bool is_builtin_type_enabled = std::is_arithmetic<typename InMultiVecType::non_const_value_type>::value;
504 const LO numLocalMeshRows = graph.row_map.extent (0) == 0 ?
505 static_cast<LO
> (0) :
506 static_cast<LO> (graph.row_map.extent (0) - 1);
507 const LO numVecs = Y.extent (1);
508 if (numLocalMeshRows == 0 || numVecs == 0) {
515 in_multivec_type X_in = X;
516 out_multivec_type Y_out = Y;
521 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
522 typedef decltype (Kokkos::subview (Y_out, Kokkos::ALL (), 0)) out_vec_type;
523 typedef BcrsApplyNoTransFunctor<alpha_type, GraphType,
524 matrix_values_type, in_vec_type, beta_type, out_vec_type,
525 is_builtin_type_enabled> functor_type;
527 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
528 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
531 if (is_builtin_type_enabled) {
532 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
534 typedef Kokkos::TeamPolicy<execution_space> policy_type;
535 policy_type policy(1,1);
536 #if defined(KOKKOS_ENABLE_CUDA)
537 constexpr
bool is_cuda = std::is_same<execution_space, Kokkos::Cuda>::value;
539 constexpr
bool is_cuda =
false;
540 #endif // defined(KOKKOS_ENABLE_CUDA)
542 LO team_size = 8, vector_size = 1;
543 if (blockSize <= 5) vector_size = 4;
544 else if (blockSize <= 9) vector_size = 8;
545 else if (blockSize <= 12) vector_size = 12;
546 else if (blockSize <= 20) vector_size = 20;
547 else vector_size = 20;
548 policy = policy_type(numLocalMeshRows, team_size, vector_size);
550 policy = policy_type(numLocalMeshRows, 1, 1);
552 Kokkos::parallel_for (policy, functor);
555 for (LO j = 1; j < numVecs; ++j) {
556 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
557 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
560 Kokkos::parallel_for (policy, functor);
563 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
564 Kokkos::RangePolicy<execution_space,LO> policy(0, numLocalMeshRows);
565 Kokkos::parallel_for (policy, functor);
566 for (LO j = 1; j < numVecs; ++j) {
567 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
568 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
571 Kokkos::parallel_for (policy, functor);
581 template<
class Scalar,
class LO,
class GO,
class Node>
582 class GetLocalDiagCopy {
584 typedef typename Node::device_type device_type;
585 typedef size_t diag_offset_type;
586 typedef Kokkos::View<
const size_t*, device_type,
587 Kokkos::MemoryUnmanaged> diag_offsets_type;
588 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
590 typedef typename local_graph_type::row_map_type row_offsets_type;
591 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
592 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
593 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
596 GetLocalDiagCopy (
const diag_type& diag,
597 const values_type& val,
598 const diag_offsets_type& diagOffsets,
599 const row_offsets_type& ptr,
600 const LO blockSize) :
602 diagOffsets_ (diagOffsets),
604 blockSize_ (blockSize),
605 offsetPerBlock_ (blockSize_*blockSize_),
609 KOKKOS_INLINE_FUNCTION
void
610 operator() (
const LO& lclRowInd)
const
615 const size_t absOffset = ptr_[lclRowInd];
618 const size_t relOffset = diagOffsets_[lclRowInd];
621 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
625 typedef Kokkos::View<
const IST**, Kokkos::LayoutRight,
626 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
627 const_little_block_type;
628 const_little_block_type D_in (val_.data () + pointOffset,
629 blockSize_, blockSize_);
630 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
636 diag_offsets_type diagOffsets_;
637 row_offsets_type ptr_;
644 template<
class Scalar,
class LO,
class GO,
class Node>
646 BlockCrsMatrix<Scalar, LO, GO, Node>::
647 markLocalErrorAndGetStream ()
649 * (this->localError_) =
true;
650 if ((*errs_).is_null ()) {
651 *errs_ = Teuchos::rcp (
new std::ostringstream ());
656 template<
class Scalar,
class LO,
class GO,
class Node>
660 graph_ (Teuchos::rcp (new
map_type ()), 0),
661 blockSize_ (static_cast<LO> (0)),
662 X_colMap_ (new Teuchos::RCP<
BMV> ()),
663 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
664 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
666 localError_ (new bool (false)),
667 errs_ (new Teuchos::RCP<std::ostringstream> ())
671 template<
class Scalar,
class LO,
class GO,
class Node>
674 const LO blockSize) :
677 rowMeshMap_ (* (graph.getRowMap ())),
678 blockSize_ (blockSize),
679 X_colMap_ (new Teuchos::RCP<
BMV> ()),
680 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
681 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
682 offsetPerBlock_ (blockSize * blockSize),
683 localError_ (new bool (false)),
684 errs_ (new Teuchos::RCP<std::ostringstream> ())
686 TEUCHOS_TEST_FOR_EXCEPTION(
687 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
688 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
689 "rows (isSorted() is false). This class assumes sorted rows.");
691 graphRCP_ = Teuchos::rcpFromRef(graph_);
697 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
698 TEUCHOS_TEST_FOR_EXCEPTION(
699 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
700 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
701 " <= 0. The block size must be positive.");
709 const auto colPointMap = Teuchos::rcp
711 *pointImporter_ = Teuchos::rcp
715 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
716 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
719 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
724 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
725 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
728 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
734 val_ = decltype (val_) (
"val", numValEnt);
737 template<
class Scalar,
class LO,
class GO,
class Node>
742 const LO blockSize) :
745 rowMeshMap_ (* (graph.getRowMap ())),
746 domainPointMap_ (domainPointMap),
747 rangePointMap_ (rangePointMap),
748 blockSize_ (blockSize),
749 X_colMap_ (new Teuchos::RCP<
BMV> ()),
750 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
751 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
752 offsetPerBlock_ (blockSize * blockSize),
753 localError_ (new bool (false)),
754 errs_ (new Teuchos::RCP<std::ostringstream> ())
756 TEUCHOS_TEST_FOR_EXCEPTION(
757 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
758 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
759 "rows (isSorted() is false). This class assumes sorted rows.");
761 graphRCP_ = Teuchos::rcpFromRef(graph_);
767 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
768 TEUCHOS_TEST_FOR_EXCEPTION(
769 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
770 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
771 " <= 0. The block size must be positive.");
775 const auto colPointMap = Teuchos::rcp
777 *pointImporter_ = Teuchos::rcp
781 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
782 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
785 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
790 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
791 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
794 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
800 val_ = decltype (val_) (
"val", numValEnt);
803 template<
class Scalar,
class LO,
class GO,
class Node>
804 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
809 return Teuchos::rcp (
new map_type (domainPointMap_));
812 template<
class Scalar,
class LO,
class GO,
class Node>
813 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
818 return Teuchos::rcp (
new map_type (rangePointMap_));
821 template<
class Scalar,
class LO,
class GO,
class Node>
822 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
826 return graph_.getRowMap();
829 template<
class Scalar,
class LO,
class GO,
class Node>
830 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
834 return graph_.getColMap();
837 template<
class Scalar,
class LO,
class GO,
class Node>
842 return graph_.getGlobalNumRows();
845 template<
class Scalar,
class LO,
class GO,
class Node>
850 return graph_.getNodeNumRows();
853 template<
class Scalar,
class LO,
class GO,
class Node>
858 return graph_.getNodeMaxNumRowEntries();
861 template<
class Scalar,
class LO,
class GO,
class Node>
866 Teuchos::ETransp mode,
871 TEUCHOS_TEST_FOR_EXCEPTION(
872 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
873 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
874 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
875 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
879 const LO blockSize = getBlockSize ();
881 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
882 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
884 catch (std::invalid_argument& e) {
885 TEUCHOS_TEST_FOR_EXCEPTION(
886 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
887 "apply: Either the input MultiVector X or the output MultiVector Y "
888 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
889 "graph. BlockMultiVector's constructor threw the following exception: "
898 const_cast<this_type*
> (
this)->applyBlock (X_view, Y_view, mode, alpha, beta);
899 }
catch (std::invalid_argument& e) {
900 TEUCHOS_TEST_FOR_EXCEPTION(
901 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
902 "apply: The implementation method applyBlock complained about having "
903 "an invalid argument. It reported the following: " << e.what ());
904 }
catch (std::logic_error& e) {
905 TEUCHOS_TEST_FOR_EXCEPTION(
906 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
907 "apply: The implementation method applyBlock complained about a "
908 "possible bug in its implementation. It reported the following: "
910 }
catch (std::exception& e) {
911 TEUCHOS_TEST_FOR_EXCEPTION(
912 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
913 "apply: The implementation method applyBlock threw an exception which "
914 "is neither std::invalid_argument nor std::logic_error, but is a "
915 "subclass of std::exception. It reported the following: "
918 TEUCHOS_TEST_FOR_EXCEPTION(
919 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
920 "apply: The implementation method applyBlock threw an exception which "
921 "is not an instance of a subclass of std::exception. This probably "
922 "indicates a bug. Please report this to the Tpetra developers.");
926 template<
class Scalar,
class LO,
class GO,
class Node>
931 Teuchos::ETransp mode,
935 TEUCHOS_TEST_FOR_EXCEPTION(
937 "Tpetra::BlockCrsMatrix::applyBlock: "
938 "X and Y have different block sizes. X.getBlockSize() = "
942 if (mode == Teuchos::NO_TRANS) {
943 applyBlockNoTrans (X, Y, alpha, beta);
944 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
945 applyBlockTrans (X, Y, mode, alpha, beta);
947 TEUCHOS_TEST_FOR_EXCEPTION(
948 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
949 "applyBlock: Invalid 'mode' argument. Valid values are "
950 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
954 template<
class Scalar,
class LO,
class GO,
class Node>
959 #ifdef HAVE_TPETRA_DEBUG
960 const char prefix[] =
"Tpetra::BlockCrsMatrix::setAllToScalar: ";
961 #endif // HAVE_TPETRA_DEBUG
963 if (this->need_sync_device ()) {
967 #ifdef HAVE_TPETRA_DEBUG
968 TEUCHOS_TEST_FOR_EXCEPTION
969 (this->need_sync_host (), std::runtime_error,
970 prefix <<
"The matrix's values need sync on both device and host.");
971 #endif // HAVE_TPETRA_DEBUG
972 this->modify_host ();
980 this->modify_device ();
985 template<
class Scalar,
class LO,
class GO,
class Node>
991 const LO numColInds)
const
993 #ifdef HAVE_TPETRA_DEBUG
994 const char prefix[] =
995 "Tpetra::BlockCrsMatrix::replaceLocalValues: ";
996 #endif // HAVE_TPETRA_DEBUG
998 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1003 return static_cast<LO
> (0);
1007 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1008 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1009 const LO perBlockSize = this->offsetPerBlock ();
1014 #ifdef HAVE_TPETRA_DEBUG
1015 TEUCHOS_TEST_FOR_EXCEPTION
1016 (this->need_sync_host (), std::runtime_error,
1017 prefix <<
"The matrix's data were last modified on device, but have "
1018 "not been sync'd to host. Please sync to host (by calling "
1019 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1021 #endif // HAVE_TPETRA_DEBUG
1023 auto vals_host_out = getValuesHost ();
1026 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1027 const LO relBlockOffset =
1028 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1029 if (relBlockOffset != LINV) {
1038 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1042 vals_host_out_raw + absBlockOffset * perBlockSize;
1047 for (LO i = 0; i < perBlockSize; ++i) {
1048 A_old[i] = A_new[i];
1050 hint = relBlockOffset + 1;
1057 template <
class Scalar,
class LO,
class GO,
class Node>
1061 Kokkos::MemoryUnmanaged>& offsets)
const
1063 graph_.getLocalDiagOffsets (offsets);
1066 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1067 template <
class Scalar,
class LO,
class GO,
class Node>
1068 void TPETRA_DEPRECATED
1077 const size_t lclNumRows = graph_.getNodeNumRows ();
1078 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
1079 offsets.resize (lclNumRows);
1085 typedef typename device_type::memory_space memory_space;
1086 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
1090 typedef Kokkos::View<
size_t*, device_type,
1091 Kokkos::MemoryUnmanaged> output_type;
1092 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1093 graph_.getLocalDiagOffsets (offsetsOut);
1096 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
1097 graph_.getLocalDiagOffsets (offsetsTmp);
1098 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
1099 Kokkos::MemoryUnmanaged> output_type;
1100 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1104 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1106 template <
class Scalar,
class LO,
class GO,
class Node>
1112 Kokkos::MemoryUnmanaged>& D_inv,
1113 const Scalar& omega,
1118 Kokkos::Details::ArithTraits<impl_scalar_type>::zero ();
1120 Kokkos::Details::ArithTraits<impl_scalar_type>::one ();
1121 const LO numLocalMeshRows =
1122 static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1129 const LO blockSize = getBlockSize ();
1130 Teuchos::Array<impl_scalar_type> localMem (blockSize);
1131 Teuchos::Array<impl_scalar_type> localMat (blockSize*blockSize);
1135 LO rowBegin = 0, rowEnd = 0, rowStride = 0;
1136 if (direction == Forward) {
1138 rowEnd = numLocalMeshRows+1;
1141 else if (direction == Backward) {
1142 rowBegin = numLocalMeshRows;
1146 else if (direction == Symmetric) {
1147 this->localGaussSeidel (B, X, D_inv, omega, Forward);
1148 this->localGaussSeidel (B, X, D_inv, omega, Backward);
1152 const Scalar one_minus_omega = Teuchos::ScalarTraits<Scalar>::one()-omega;
1153 const Scalar minus_omega = -omega;
1156 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1157 const LO actlRow = lclRow - 1;
1160 COPY (B_cur, X_lcl);
1161 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1163 const size_t meshBeg = ptrHost_[actlRow];
1164 const size_t meshEnd = ptrHost_[actlRow+1];
1165 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1166 const LO meshCol = indHost_[absBlkOff];
1167 const_little_block_type A_cur =
1168 getConstLocalBlockFromAbsOffset (absBlkOff);
1172 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1174 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1180 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1182 FILL (X_update, zero);
1183 GEMV (one, D_lcl, X_lcl, X_update);
1187 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1188 for (LO j = 0; j < numVecs; ++j) {
1189 LO actlRow = lclRow-1;
1192 COPY (B_cur, X_lcl);
1193 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1195 const size_t meshBeg = ptrHost_[actlRow];
1196 const size_t meshEnd = ptrHost_[actlRow+1];
1197 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1198 const LO meshCol = indHost_[absBlkOff];
1199 const_little_block_type A_cur =
1200 getConstLocalBlockFromAbsOffset (absBlkOff);
1204 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1205 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1208 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1210 FILL (X_update, zero);
1211 GEMV (one, D_lcl, X_lcl, X_update);
1217 template <
class Scalar,
class LO,
class GO,
class Node>
1230 TEUCHOS_TEST_FOR_EXCEPTION(
1231 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
1232 "gaussSeidelCopy: Not implemented.");
1235 template <
class Scalar,
class LO,
class GO,
class Node>
1241 const Teuchos::ArrayView<LO>& ,
1249 TEUCHOS_TEST_FOR_EXCEPTION(
1250 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
1251 "reorderedGaussSeidelCopy: Not implemented.");
1254 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1255 template <
class Scalar,
class LO,
class GO,
class Node>
1256 void TPETRA_DEPRECATED
1259 const Teuchos::ArrayView<const size_t>& offsets)
const
1261 using Teuchos::ArrayRCP;
1262 using Teuchos::ArrayView;
1263 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1265 const size_t myNumRows = rowMeshMap_.getNodeNumElements();
1266 const LO* columnIndices;
1269 Teuchos::Array<LO> cols(1);
1272 Teuchos::Array<Scalar> zeroMat (blockSize_*blockSize_, ZERO);
1273 for (
size_t i = 0; i < myNumRows; ++i) {
1275 if (offsets[i] == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1279 getLocalRowView (i, columnIndices, vals, numColumns);
1280 diag.
replaceLocalValues (i, cols.getRawPtr(), &vals[offsets[i]*blockSize_*blockSize_], 1);
1284 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1286 template <
class Scalar,
class LO,
class GO,
class Node>
1290 Kokkos::MemoryUnmanaged>& diag,
1291 const Kokkos::View<
const size_t*, device_type,
1292 Kokkos::MemoryUnmanaged>& offsets)
const
1294 using Kokkos::parallel_for;
1295 typedef typename device_type::execution_space execution_space;
1296 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
1298 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1299 const LO blockSize = this->getBlockSize ();
1300 TEUCHOS_TEST_FOR_EXCEPTION
1301 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
1302 static_cast<LO> (diag.extent (1)) < blockSize ||
1303 static_cast<LO> (diag.extent (2)) < blockSize,
1304 std::invalid_argument, prefix <<
1305 "The input Kokkos::View is not big enough to hold all the data.");
1306 TEUCHOS_TEST_FOR_EXCEPTION
1307 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
1308 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
1309 "diagonal blocks " << lclNumMeshRows <<
".");
1311 #ifdef HAVE_TPETRA_DEBUG
1312 TEUCHOS_TEST_FOR_EXCEPTION
1313 (this->
template need_sync<device_type> (), std::runtime_error,
1314 prefix <<
"The matrix's data were last modified on host, but have "
1315 "not been sync'd to device. Please sync to device (by calling "
1316 "sync<device_type>() on this matrix) before calling this method.");
1317 #endif // HAVE_TPETRA_DEBUG
1319 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
1320 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
1328 const_cast<this_type*
> (
this)->
template getValues<device_type> ();
1330 parallel_for (policy_type (0, lclNumMeshRows),
1331 functor_type (diag, vals_dev, offsets,
1332 graph_.getLocalGraph ().row_map, blockSize_));
1336 template <
class Scalar,
class LO,
class GO,
class Node>
1340 Kokkos::MemoryUnmanaged>& diag,
1341 const Teuchos::ArrayView<const size_t>& offsets)
const
1344 using Kokkos::parallel_for;
1346 Kokkos::MemoryUnmanaged>::HostMirror::execution_space host_exec_space;
1348 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1349 const LO blockSize = this->getBlockSize ();
1350 TEUCHOS_TEST_FOR_EXCEPTION
1351 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
1352 static_cast<LO> (diag.extent (1)) < blockSize ||
1353 static_cast<LO> (diag.extent (2)) < blockSize,
1354 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: "
1355 "The input Kokkos::View is not big enough to hold all the data.");
1356 TEUCHOS_TEST_FOR_EXCEPTION
1357 (static_cast<LO> (offsets.size ()) < lclNumMeshRows,
1358 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: "
1359 "offsets.size() = " << offsets.size () <<
" < local number of diagonal "
1360 "blocks " << lclNumMeshRows <<
".");
1364 typedef Kokkos::RangePolicy<host_exec_space, LO> policy_type;
1365 parallel_for (policy_type (0, lclNumMeshRows), [=] (
const LO& lclMeshRow) {
1366 auto D_in = this->getConstLocalBlockFromRelOffset (lclMeshRow, offsets[lclMeshRow]);
1367 auto D_out = Kokkos::subview (diag, lclMeshRow, ALL (), ALL ());
1373 template<
class Scalar,
class LO,
class GO,
class Node>
1378 const Scalar vals[],
1379 const LO numColInds)
const
1381 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1386 return static_cast<LO
> (0);
1390 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1391 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1392 const LO perBlockSize = this->offsetPerBlock ();
1397 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1398 const LO relBlockOffset =
1399 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1400 if (relBlockOffset != LINV) {
1401 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1402 little_block_type A_old =
1403 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1404 const_little_block_type A_new =
1405 getConstLocalBlockFromInput (vIn, pointOffset);
1408 hint = relBlockOffset + 1;
1416 template<
class Scalar,
class LO,
class GO,
class Node>
1421 const Scalar vals[],
1422 const LO numColInds)
const
1424 #ifdef HAVE_TPETRA_DEBUG
1425 const char prefix[] =
1426 "Tpetra::BlockCrsMatrix::sumIntoLocalValues: ";
1427 #endif // HAVE_TPETRA_DEBUG
1429 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1434 return static_cast<LO
> (0);
1439 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1440 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1441 const LO perBlockSize = this->offsetPerBlock ();
1446 #ifdef HAVE_TPETRA_DEBUG
1447 TEUCHOS_TEST_FOR_EXCEPTION
1448 (this->need_sync_host (), std::runtime_error,
1449 prefix <<
"The matrix's data were last modified on device, but have not "
1450 "been sync'd to host. Please sync to host (by calling "
1451 "sync<Kokkos::HostSpace>() on this matrix) before calling this method.");
1452 #endif // HAVE_TPETRA_DEBUG
1454 auto vals_host_out =
1458 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1459 const LO relBlockOffset =
1460 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1461 if (relBlockOffset != LINV) {
1470 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1474 vals_host_out_raw + absBlockOffset * perBlockSize;
1479 for (LO i = 0; i < perBlockSize; ++i) {
1480 A_old[i] += A_new[i];
1482 hint = relBlockOffset + 1;
1489 template<
class Scalar,
class LO,
class GO,
class Node>
1497 #ifdef HAVE_TPETRA_DEBUG
1498 const char prefix[] =
1499 "Tpetra::BlockCrsMatrix::getLocalRowView: ";
1500 #endif // HAVE_TPETRA_DEBUG
1502 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1506 return Teuchos::OrdinalTraits<LO>::invalid ();
1509 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
1510 colInds = indHost_.data () + absBlockOffsetStart;
1512 #ifdef HAVE_TPETRA_DEBUG
1513 TEUCHOS_TEST_FOR_EXCEPTION
1514 (this->need_sync_host (), std::runtime_error,
1515 prefix <<
"The matrix's data were last modified on device, but have "
1516 "not been sync'd to host. Please sync to host (by calling "
1517 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1519 #endif // HAVE_TPETRA_DEBUG
1521 auto vals_host_out = getValuesHost ();
1524 absBlockOffsetStart * offsetPerBlock ();
1525 vals =
reinterpret_cast<Scalar*
> (vOut);
1527 numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
1532 template<
class Scalar,
class LO,
class GO,
class Node>
1536 const Teuchos::ArrayView<LO>& Indices,
1537 const Teuchos::ArrayView<Scalar>& Values,
1538 size_t &NumEntries)
const
1543 getLocalRowView(LocalRow,colInds,vals,numInds);
1544 if (numInds > Indices.size() || numInds*blockSize_*blockSize_ > Values.size()) {
1545 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
1546 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
1547 << numInds <<
" row entries");
1549 for (LO i=0; i<numInds; ++i) {
1550 Indices[i] = colInds[i];
1552 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1553 Values[i] = vals[i];
1555 NumEntries = numInds;
1558 template<
class Scalar,
class LO,
class GO,
class Node>
1562 ptrdiff_t offsets[],
1564 const LO numColInds)
const
1566 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1571 return static_cast<LO
> (0);
1574 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1578 for (LO k = 0; k < numColInds; ++k) {
1579 const LO relBlockOffset =
1580 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1581 if (relBlockOffset != LINV) {
1582 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
1583 hint = relBlockOffset + 1;
1591 template<
class Scalar,
class LO,
class GO,
class Node>
1595 const ptrdiff_t offsets[],
1596 const Scalar vals[],
1597 const LO numOffsets)
const
1599 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1604 return static_cast<LO
> (0);
1608 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1609 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1610 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1611 size_t pointOffset = 0;
1614 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1615 const size_t relBlockOffset = offsets[k];
1616 if (relBlockOffset != STINV) {
1617 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1618 little_block_type A_old =
1619 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1620 const_little_block_type A_new =
1621 getConstLocalBlockFromInput (vIn, pointOffset);
1622 COPY (A_new, A_old);
1630 template<
class Scalar,
class LO,
class GO,
class Node>
1634 const ptrdiff_t offsets[],
1635 const Scalar vals[],
1636 const LO numOffsets)
const
1638 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1643 return static_cast<LO
> (0);
1647 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1648 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1649 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1650 size_t pointOffset = 0;
1653 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1654 const size_t relBlockOffset = offsets[k];
1655 if (relBlockOffset != STINV) {
1656 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1657 little_block_type A_old =
1658 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1659 const_little_block_type A_new =
1660 getConstLocalBlockFromInput (vIn, pointOffset);
1669 template<
class Scalar,
class LO,
class GO,
class Node>
1673 const ptrdiff_t offsets[],
1674 const Scalar vals[],
1675 const LO numOffsets)
const
1677 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1682 return static_cast<LO
> (0);
1687 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1688 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1689 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1690 size_t pointOffset = 0;
1693 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1694 const size_t relBlockOffset = offsets[k];
1695 if (relBlockOffset != STINV) {
1696 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1697 little_block_type A_old =
1698 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1699 const_little_block_type A_new =
1700 getConstLocalBlockFromInput (vIn, pointOffset);
1702 AXPY (ONE, A_new, A_old);
1710 template<
class Scalar,
class LO,
class GO,
class Node>
1715 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
1716 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1717 return static_cast<LO
> (0);
1719 return static_cast<LO
> (numEntInGraph);
1723 template<
class Scalar,
class LO,
class GO,
class Node>
1728 const Teuchos::ETransp mode,
1738 TEUCHOS_TEST_FOR_EXCEPTION(
1739 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
1740 "transpose and conjugate transpose modes are not yet implemented.");
1743 template<
class Scalar,
class LO,
class GO,
class Node>
1745 BlockCrsMatrix<Scalar, LO, GO, Node>::
1746 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1747 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1753 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1754 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1755 const Scalar zero = STS::zero ();
1756 const Scalar one = STS::one ();
1757 RCP<const import_type>
import = graph_.getImporter ();
1759 RCP<const export_type> theExport = graph_.getExporter ();
1760 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1762 if (alpha == zero) {
1766 else if (beta != one) {
1771 const BMV* X_colMap = NULL;
1772 if (
import.is_null ()) {
1776 catch (std::exception& e) {
1777 TEUCHOS_TEST_FOR_EXCEPTION
1778 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1779 "operator= threw an exception: " << std::endl << e.what ());
1789 if ((*X_colMap_).is_null () ||
1790 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1791 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1792 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1793 static_cast<LO
> (X.getNumVectors ())));
1795 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1799 X_colMap = &(**X_colMap_);
1801 catch (std::exception& e) {
1802 TEUCHOS_TEST_FOR_EXCEPTION
1803 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1804 "operator= threw an exception: " << std::endl << e.what ());
1808 BMV* Y_rowMap = NULL;
1809 if (theExport.is_null ()) {
1812 else if ((*Y_rowMap_).is_null () ||
1813 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1814 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1815 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1816 static_cast<LO
> (X.getNumVectors ())));
1818 Y_rowMap = &(**Y_rowMap_);
1820 catch (std::exception& e) {
1821 TEUCHOS_TEST_FOR_EXCEPTION(
1822 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1823 "operator= threw an exception: " << std::endl << e.what ());
1828 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1830 catch (std::exception& e) {
1831 TEUCHOS_TEST_FOR_EXCEPTION
1832 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1833 "an exception: " << e.what ());
1836 TEUCHOS_TEST_FOR_EXCEPTION
1837 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1838 "an exception not a subclass of std::exception.");
1841 if (! theExport.is_null ()) {
1847 template<
class Scalar,
class LO,
class GO,
class Node>
1849 BlockCrsMatrix<Scalar, LO, GO, Node>::
1850 localApplyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1851 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1855 using ::Tpetra::Impl::bcrsLocalApplyNoTrans;
1857 const impl_scalar_type alpha_impl = alpha;
1858 const auto graph = this->graph_.getLocalGraph ();
1859 const impl_scalar_type beta_impl = beta;
1860 const LO blockSize = this->getBlockSize ();
1862 auto X_mv = X.getMultiVectorView ();
1863 auto Y_mv = Y.getMultiVectorView ();
1864 Y_mv.template modify<device_type> ();
1866 auto X_lcl = X_mv.template getLocalView<device_type> ();
1867 auto Y_lcl = Y_mv.template getLocalView<device_type> ();
1868 auto val = this->val_.template view<device_type> ();
1870 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1874 template<
class Scalar,
class LO,
class GO,
class Node>
1876 BlockCrsMatrix<Scalar, LO, GO, Node>::
1877 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1878 const LO colIndexToFind,
1879 const LO hint)
const
1881 const size_t absStartOffset = ptrHost_[localRowIndex];
1882 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1883 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1885 const LO*
const curInd = indHost_.data () + absStartOffset;
1888 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1895 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1900 const LO maxNumEntriesForLinearSearch = 32;
1901 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1906 const LO* beg = curInd;
1907 const LO* end = curInd + numEntriesInRow;
1908 std::pair<const LO*, const LO*> p =
1909 std::equal_range (beg, end, colIndexToFind);
1910 if (p.first != p.second) {
1912 relOffset =
static_cast<LO
> (p.first - beg);
1916 for (LO k = 0; k < numEntriesInRow; ++k) {
1917 if (colIndexToFind == curInd[k]) {
1927 template<
class Scalar,
class LO,
class GO,
class Node>
1929 BlockCrsMatrix<Scalar, LO, GO, Node>::
1930 offsetPerBlock ()
const
1932 return offsetPerBlock_;
1935 template<
class Scalar,
class LO,
class GO,
class Node>
1936 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1937 BlockCrsMatrix<Scalar, LO, GO, Node>::
1938 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1939 const size_t pointOffset)
const
1942 const LO rowStride = blockSize_;
1943 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1946 template<
class Scalar,
class LO,
class GO,
class Node>
1947 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1948 BlockCrsMatrix<Scalar, LO, GO, Node>::
1949 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1950 const size_t pointOffset)
const
1953 const LO rowStride = blockSize_;
1954 return little_block_type (val + pointOffset, blockSize_, rowStride);
1957 template<
class Scalar,
class LO,
class GO,
class Node>
1958 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1959 BlockCrsMatrix<Scalar, LO, GO, Node>::
1960 getConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const
1962 #ifdef HAVE_TPETRA_DEBUG
1963 const char prefix[] =
1964 "Tpetra::BlockCrsMatrix::getConstLocalBlockFromAbsOffset: ";
1965 #endif // HAVE_TPETRA_DEBUG
1967 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
1971 return const_little_block_type ();
1974 #ifdef HAVE_TPETRA_DEBUG
1975 TEUCHOS_TEST_FOR_EXCEPTION
1976 (this->need_sync_host (), std::runtime_error,
1977 prefix <<
"The matrix's data were last modified on device, but have "
1978 "not been sync'd to host. Please sync to host (by calling "
1979 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1981 #endif // HAVE_TPETRA_DEBUG
1982 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
1984 auto vals_host = getValuesHost ();
1985 const impl_scalar_type* vals_host_raw = vals_host.data ();
1987 return getConstLocalBlockFromInput (vals_host_raw, absPointOffset);
1991 template<
class Scalar,
class LO,
class GO,
class Node>
1992 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1993 BlockCrsMatrix<Scalar, LO, GO, Node>::
1994 getConstLocalBlockFromRelOffset (
const LO lclMeshRow,
1995 const size_t relMeshOffset)
const
1997 typedef impl_scalar_type IST;
1999 const LO* lclColInds = NULL;
2000 Scalar* lclVals = NULL;
2003 LO err = this->getLocalRowView (lclMeshRow, lclColInds, lclVals, numEnt);
2008 return const_little_block_type ();
2011 const size_t relPointOffset = relMeshOffset * this->offsetPerBlock ();
2012 IST* lclValsImpl =
reinterpret_cast<IST*
> (lclVals);
2013 return this->getConstLocalBlockFromInput (const_cast<const IST*> (lclValsImpl),
2018 template<
class Scalar,
class LO,
class GO,
class Node>
2019 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
2020 BlockCrsMatrix<Scalar, LO, GO, Node>::
2021 getNonConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const
2023 #ifdef HAVE_TPETRA_DEBUG
2024 const char prefix[] =
2025 "Tpetra::BlockCrsMatrix::getNonConstLocalBlockFromAbsOffset: ";
2026 #endif // HAVE_TPETRA_DEBUG
2028 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
2032 return little_block_type ();
2035 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2036 #ifdef HAVE_TPETRA_DEBUG
2037 TEUCHOS_TEST_FOR_EXCEPTION
2038 (this->need_sync_host (), std::runtime_error,
2039 prefix <<
"The matrix's data were last modified on device, but have "
2040 "not been sync'd to host. Please sync to host (by calling "
2041 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
2043 #endif // HAVE_TPETRA_DEBUG
2044 auto vals_host = getValuesHost();
2045 impl_scalar_type* vals_host_raw = vals_host.data ();
2046 return getNonConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2050 template<
class Scalar,
class LO,
class GO,
class Node>
2051 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
2052 BlockCrsMatrix<Scalar, LO, GO, Node>::
2053 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const
2055 const size_t absRowBlockOffset = ptrHost_[localRowInd];
2056 const LO relBlockOffset =
2057 this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
2059 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
2060 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
2061 return getNonConstLocalBlockFromAbsOffset (absBlockOffset);
2064 return little_block_type ();
2078 template<
class Scalar,
class LO,
class GO,
class Node>
2080 BlockCrsMatrix<Scalar, LO, GO, Node>::
2081 checkSizes (const ::Tpetra::SrcDistObject& source)
2084 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2085 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2088 std::ostream& err = markLocalErrorAndGetStream ();
2089 err <<
"checkSizes: The source object of the Import or Export "
2090 "must be a BlockCrsMatrix with the same template parameters as the "
2091 "target object." << endl;
2096 if (src->getBlockSize () != this->getBlockSize ()) {
2097 std::ostream& err = markLocalErrorAndGetStream ();
2098 err <<
"checkSizes: The source and target objects of the Import or "
2099 <<
"Export must have the same block sizes. The source's block "
2100 <<
"size = " << src->getBlockSize () <<
" != the target's block "
2101 <<
"size = " << this->getBlockSize () <<
"." << endl;
2103 if (! src->graph_.isFillComplete ()) {
2104 std::ostream& err = markLocalErrorAndGetStream ();
2105 err <<
"checkSizes: The source object of the Import or Export is "
2106 "not fill complete. Both source and target objects must be fill "
2107 "complete." << endl;
2109 if (! this->graph_.isFillComplete ()) {
2110 std::ostream& err = markLocalErrorAndGetStream ();
2111 err <<
"checkSizes: The target object of the Import or Export is "
2112 "not fill complete. Both source and target objects must be fill "
2113 "complete." << endl;
2115 if (src->graph_.getColMap ().is_null ()) {
2116 std::ostream& err = markLocalErrorAndGetStream ();
2117 err <<
"checkSizes: The source object of the Import or Export does "
2118 "not have a column Map. Both source and target objects must have "
2119 "column Maps." << endl;
2121 if (this->graph_.getColMap ().is_null ()) {
2122 std::ostream& err = markLocalErrorAndGetStream ();
2123 err <<
"checkSizes: The target object of the Import or Export does "
2124 "not have a column Map. Both source and target objects must have "
2125 "column Maps." << endl;
2129 return ! (* (this->localError_));
2132 template<
class Scalar,
class LO,
class GO,
class Node>
2134 BlockCrsMatrix<Scalar, LO, GO, Node>::
2135 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2137 #else // TPETRA_ENABLE_DEPRECATED_CODE
2139 #endif // TPETRA_ENABLE_DEPRECATED_CODE
2140 (const ::Tpetra::SrcDistObject& source,
2141 const size_t numSameIDs,
2142 const Kokkos::DualView<
const local_ordinal_type*,
2143 buffer_device_type>& permuteToLIDs,
2144 const Kokkos::DualView<
const local_ordinal_type*,
2145 buffer_device_type>& permuteFromLIDs)
2147 using ::Tpetra::Details::Behavior;
2149 using ::Tpetra::Details::ProfilingRegion;
2153 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
2154 const bool debug = Behavior::debug();
2155 const bool verbose = Behavior::verbose();
2160 std::ostringstream os;
2161 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2162 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
2167 if (* (this->localError_)) {
2168 std::ostream& err = this->markLocalErrorAndGetStream ();
2170 <<
"The target object of the Import or Export is already in an error state."
2179 std::ostringstream os;
2180 os << prefix << endl
2183 std::cerr << os.str ();
2189 if (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0)) {
2190 std::ostream& err = this->markLocalErrorAndGetStream ();
2192 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent (0)
2193 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
2197 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
2198 std::ostream& err = this->markLocalErrorAndGetStream ();
2200 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
2205 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2207 std::ostream& err = this->markLocalErrorAndGetStream ();
2209 <<
"The source (input) object of the Import or "
2210 "Export is either not a BlockCrsMatrix, or does not have the right "
2211 "template parameters. checkSizes() should have caught this. "
2212 "Please report this bug to the Tpetra developers." << endl;
2221 const_cast<this_type*
>(src)->sync_host();
2225 bool lclErr =
false;
2226 #ifdef HAVE_TPETRA_DEBUG
2227 std::set<LO> invalidSrcCopyRows;
2228 std::set<LO> invalidDstCopyRows;
2229 std::set<LO> invalidDstCopyCols;
2230 std::set<LO> invalidDstPermuteCols;
2231 std::set<LO> invalidPermuteFromRows;
2232 #endif // HAVE_TPETRA_DEBUG
2241 #ifdef HAVE_TPETRA_DEBUG
2242 const map_type& srcRowMap = * (src->graph_.getRowMap ());
2243 #endif // HAVE_TPETRA_DEBUG
2244 const map_type& dstRowMap = * (this->graph_.getRowMap ());
2245 const map_type& srcColMap = * (src->graph_.getColMap ());
2246 const map_type& dstColMap = * (this->graph_.getColMap ());
2247 const bool canUseLocalColumnIndices = srcColMap.locallySameAs (dstColMap);
2249 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.extent(0));
2251 std::ostringstream os;
2253 <<
"canUseLocalColumnIndices: "
2254 << (canUseLocalColumnIndices ?
"true" :
"false")
2255 <<
", numPermute: " << numPermute
2257 std::cerr << os.str ();
2260 const auto permuteToLIDsHost = permuteToLIDs.view_host();
2261 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
2263 if (canUseLocalColumnIndices) {
2265 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2266 #ifdef HAVE_TPETRA_DEBUG
2267 if (! srcRowMap.isNodeLocalElement (localRow)) {
2269 invalidSrcCopyRows.insert (localRow);
2272 #endif // HAVE_TPETRA_DEBUG
2274 const LO* lclSrcCols;
2279 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2282 #ifdef HAVE_TPETRA_DEBUG
2283 (void) invalidSrcCopyRows.insert (localRow);
2284 #endif // HAVE_TPETRA_DEBUG
2287 err = this->replaceLocalValues (localRow, lclSrcCols, vals, numEntries);
2288 if (err != numEntries) {
2290 if (! dstRowMap.isNodeLocalElement (localRow)) {
2291 #ifdef HAVE_TPETRA_DEBUG
2292 invalidDstCopyRows.insert (localRow);
2293 #endif // HAVE_TPETRA_DEBUG
2301 for (LO k = 0; k < numEntries; ++k) {
2302 if (! dstColMap.isNodeLocalElement (lclSrcCols[k])) {
2304 #ifdef HAVE_TPETRA_DEBUG
2305 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
2306 #endif // HAVE_TPETRA_DEBUG
2315 for (
size_t k = 0; k < numPermute; ++k) {
2316 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
2317 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
2319 const LO* lclSrcCols;
2322 LO err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2325 #ifdef HAVE_TPETRA_DEBUG
2326 invalidPermuteFromRows.insert (srcLclRow);
2327 #endif // HAVE_TPETRA_DEBUG
2330 err = this->replaceLocalValues (dstLclRow, lclSrcCols, vals, numEntries);
2331 if (err != numEntries) {
2333 #ifdef HAVE_TPETRA_DEBUG
2334 for (LO c = 0; c < numEntries; ++c) {
2335 if (! dstColMap.isNodeLocalElement (lclSrcCols[c])) {
2336 invalidDstPermuteCols.insert (lclSrcCols[c]);
2339 #endif // HAVE_TPETRA_DEBUG
2346 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
2347 Teuchos::Array<LO> lclDstCols (maxNumEnt);
2350 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2351 const LO* lclSrcCols;
2358 err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2359 }
catch (std::exception& e) {
2361 std::ostringstream os;
2362 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2363 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2364 << localRow <<
", src->getLocalRowView() threw an exception: "
2366 std::cerr << os.str ();
2373 #ifdef HAVE_TPETRA_DEBUG
2374 invalidSrcCopyRows.insert (localRow);
2375 #endif // HAVE_TPETRA_DEBUG
2378 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2381 std::ostringstream os;
2382 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2383 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2384 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
2385 << maxNumEnt << endl;
2386 std::cerr << os.str ();
2392 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2393 for (LO j = 0; j < numEntries; ++j) {
2394 lclDstColsView[j] = dstColMap.getLocalElement (srcColMap.getGlobalElement (lclSrcCols[j]));
2395 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2397 #ifdef HAVE_TPETRA_DEBUG
2398 invalidDstCopyCols.insert (lclDstColsView[j]);
2399 #endif // HAVE_TPETRA_DEBUG
2403 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (), vals, numEntries);
2404 }
catch (std::exception& e) {
2406 std::ostringstream os;
2407 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2408 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2409 << localRow <<
", this->replaceLocalValues() threw an exception: "
2411 std::cerr << os.str ();
2415 if (err != numEntries) {
2418 std::ostringstream os;
2419 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2420 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
2421 "localRow " << localRow <<
", this->replaceLocalValues "
2422 "returned " << err <<
" instead of numEntries = "
2423 << numEntries << endl;
2424 std::cerr << os.str ();
2432 for (
size_t k = 0; k < numPermute; ++k) {
2433 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
2434 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
2436 const LO* lclSrcCols;
2441 err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2442 }
catch (std::exception& e) {
2444 std::ostringstream os;
2445 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2446 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
2447 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
2448 <<
", src->getLocalRowView() threw an exception: " << e.what ();
2449 std::cerr << os.str ();
2456 #ifdef HAVE_TPETRA_DEBUG
2457 invalidPermuteFromRows.insert (srcLclRow);
2458 #endif // HAVE_TPETRA_DEBUG
2461 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2467 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2468 for (LO j = 0; j < numEntries; ++j) {
2469 lclDstColsView[j] = dstColMap.getLocalElement (srcColMap.getGlobalElement (lclSrcCols[j]));
2470 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2472 #ifdef HAVE_TPETRA_DEBUG
2473 invalidDstPermuteCols.insert (lclDstColsView[j]);
2474 #endif // HAVE_TPETRA_DEBUG
2477 err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (), vals, numEntries);
2478 if (err != numEntries) {
2487 std::ostream& err = this->markLocalErrorAndGetStream ();
2488 #ifdef HAVE_TPETRA_DEBUG
2489 err <<
"copyAndPermute: The graph structure of the source of the "
2490 "Import or Export must be a subset of the graph structure of the "
2492 err <<
"invalidSrcCopyRows = [";
2493 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
2494 it != invalidSrcCopyRows.end (); ++it) {
2496 typename std::set<LO>::const_iterator itp1 = it;
2498 if (itp1 != invalidSrcCopyRows.end ()) {
2502 err <<
"], invalidDstCopyRows = [";
2503 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
2504 it != invalidDstCopyRows.end (); ++it) {
2506 typename std::set<LO>::const_iterator itp1 = it;
2508 if (itp1 != invalidDstCopyRows.end ()) {
2512 err <<
"], invalidDstCopyCols = [";
2513 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
2514 it != invalidDstCopyCols.end (); ++it) {
2516 typename std::set<LO>::const_iterator itp1 = it;
2518 if (itp1 != invalidDstCopyCols.end ()) {
2522 err <<
"], invalidDstPermuteCols = [";
2523 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
2524 it != invalidDstPermuteCols.end (); ++it) {
2526 typename std::set<LO>::const_iterator itp1 = it;
2528 if (itp1 != invalidDstPermuteCols.end ()) {
2532 err <<
"], invalidPermuteFromRows = [";
2533 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
2534 it != invalidPermuteFromRows.end (); ++it) {
2536 typename std::set<LO>::const_iterator itp1 = it;
2538 if (itp1 != invalidPermuteFromRows.end ()) {
2544 err <<
"copyAndPermute: The graph structure of the source of the "
2545 "Import or Export must be a subset of the graph structure of the "
2547 #endif // HAVE_TPETRA_DEBUG
2551 std::ostringstream os;
2552 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2553 const bool lclSuccess = ! (* (this->localError_));
2554 os <<
"*** Proc " << myRank <<
": copyAndPermute "
2555 << (lclSuccess ?
"succeeded" :
"FAILED");
2559 os <<
": error messages: " << this->errorMessages ();
2561 std::cerr << os.str ();
2585 template<
class LO,
class GO,
class D>
2587 packRowCount (
const size_t numEnt,
2588 const size_t numBytesPerValue,
2589 const size_t blkSize)
2591 using ::Tpetra::Details::PackTraits;
2601 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2602 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2603 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2604 return numEntLen + gidsLen + valsLen;
2618 template<
class ST,
class LO,
class GO,
class D>
2620 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO, D>::input_buffer_type& imports,
2621 const size_t offset,
2622 const size_t numBytes,
2625 using Kokkos::subview;
2626 using ::Tpetra::Details::PackTraits;
2628 if (numBytes == 0) {
2630 return static_cast<size_t> (0);
2634 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
2635 TEUCHOS_TEST_FOR_EXCEPTION
2636 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2637 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2639 const char*
const inBuf = imports.data () + offset;
2640 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
2641 TEUCHOS_TEST_FOR_EXCEPTION
2642 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2643 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2645 return static_cast<size_t> (numEntLO);
2652 template<
class ST,
class LO,
class GO,
class D>
2654 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO, D>::output_buffer_type exports,
2655 const size_t offset,
2656 const size_t numEnt,
2657 const typename ::Tpetra::Details::PackTraits<GO, D>::input_array_type& gidsIn,
2658 const typename ::Tpetra::Details::PackTraits<ST, D>::input_array_type& valsIn,
2659 const size_t numBytesPerValue,
2660 const size_t blockSize)
2662 using Kokkos::subview;
2663 using ::Tpetra::Details::PackTraits;
2669 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2672 const LO numEntLO =
static_cast<size_t> (numEnt);
2674 const size_t numEntBeg = offset;
2675 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2676 const size_t gidsBeg = numEntBeg + numEntLen;
2677 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2678 const size_t valsBeg = gidsBeg + gidsLen;
2679 const size_t valsLen = numScalarEnt * numBytesPerValue;
2681 char*
const numEntOut = exports.data () + numEntBeg;
2682 char*
const gidsOut = exports.data () + gidsBeg;
2683 char*
const valsOut = exports.data () + valsBeg;
2685 size_t numBytesOut = 0;
2687 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
2690 Kokkos::pair<int, size_t> p;
2691 p = PackTraits<GO, D>::packArray (gidsOut, gidsIn.data (), numEnt);
2692 errorCode += p.first;
2693 numBytesOut += p.second;
2695 p = PackTraits<ST, D>::packArray (valsOut, valsIn.data (), numScalarEnt);
2696 errorCode += p.first;
2697 numBytesOut += p.second;
2700 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2701 TEUCHOS_TEST_FOR_EXCEPTION
2702 (numBytesOut != expectedNumBytes, std::logic_error,
2703 "packRowForBlockCrs: numBytesOut = " << numBytesOut
2704 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2706 TEUCHOS_TEST_FOR_EXCEPTION
2707 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
2708 "PackTraits::packArray returned a nonzero error code");
2714 template<
class ST,
class LO,
class GO,
class D>
2716 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO, D>::output_array_type& gidsOut,
2717 const typename ::Tpetra::Details::PackTraits<ST, D>::output_array_type& valsOut,
2718 const typename ::Tpetra::Details::PackTraits<int, D>::input_buffer_type& imports,
2719 const size_t offset,
2720 const size_t numBytes,
2721 const size_t numEnt,
2722 const size_t numBytesPerValue,
2723 const size_t blockSize)
2725 using ::Tpetra::Details::PackTraits;
2727 if (numBytes == 0) {
2731 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2732 TEUCHOS_TEST_FOR_EXCEPTION
2733 (static_cast<size_t> (imports.extent (0)) <= offset,
2734 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2735 << imports.extent (0) <<
" <= offset = " << offset <<
".");
2736 TEUCHOS_TEST_FOR_EXCEPTION
2737 (static_cast<size_t> (imports.extent (0)) < offset + numBytes,
2738 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2739 << imports.extent (0) <<
" < offset + numBytes = "
2740 << (offset + numBytes) <<
".");
2745 const size_t numEntBeg = offset;
2746 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
2747 const size_t gidsBeg = numEntBeg + numEntLen;
2748 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2749 const size_t valsBeg = gidsBeg + gidsLen;
2750 const size_t valsLen = numScalarEnt * numBytesPerValue;
2752 const char*
const numEntIn = imports.data () + numEntBeg;
2753 const char*
const gidsIn = imports.data () + gidsBeg;
2754 const char*
const valsIn = imports.data () + valsBeg;
2756 size_t numBytesOut = 0;
2759 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
2760 TEUCHOS_TEST_FOR_EXCEPTION
2761 (static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2762 "unpackRowForBlockCrs: Expected number of entries " << numEnt
2763 <<
" != actual number of entries " << numEntOut <<
".");
2766 Kokkos::pair<int, size_t> p;
2767 p = PackTraits<GO, D>::unpackArray (gidsOut.data (), gidsIn, numEnt);
2768 errorCode += p.first;
2769 numBytesOut += p.second;
2771 p = PackTraits<ST, D>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
2772 errorCode += p.first;
2773 numBytesOut += p.second;
2776 TEUCHOS_TEST_FOR_EXCEPTION
2777 (numBytesOut != numBytes, std::logic_error,
2778 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2779 <<
" != numBytes = " << numBytes <<
".");
2781 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2782 TEUCHOS_TEST_FOR_EXCEPTION
2783 (numBytesOut != expectedNumBytes, std::logic_error,
2784 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2785 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2787 TEUCHOS_TEST_FOR_EXCEPTION
2788 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
2789 "PackTraits::unpackArray returned a nonzero error code");
2795 template<
class Scalar,
class LO,
class GO,
class Node>
2797 BlockCrsMatrix<Scalar, LO, GO, Node>::
2798 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2800 #else // TPETRA_ENABLE_DEPRECATED_CODE
2802 #endif // TPETRA_ENABLE_DEPRECATED_CODE
2803 (const ::Tpetra::SrcDistObject& source,
2804 const Kokkos::DualView<
const local_ordinal_type*,
2805 buffer_device_type>& exportLIDs,
2806 Kokkos::DualView<packet_type*,
2807 buffer_device_type>& exports,
2808 Kokkos::DualView<
size_t*,
2809 buffer_device_type> numPacketsPerLID,
2810 size_t& constantNumPackets,
2813 using ::Tpetra::Details::Behavior;
2815 using ::Tpetra::Details::ProfilingRegion;
2816 using ::Tpetra::Details::PackTraits;
2818 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space host_exec;
2822 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
2824 const bool debug = Behavior::debug();
2825 const bool verbose = Behavior::verbose();
2830 std::ostringstream os;
2831 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2832 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
2837 if (* (this->localError_)) {
2838 std::ostream& err = this->markLocalErrorAndGetStream ();
2840 <<
"The target object of the Import or Export is already in an error state."
2849 std::ostringstream os;
2850 os << prefix << std::endl
2854 std::cerr << os.str ();
2860 if (exportLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2861 std::ostream& err = this->markLocalErrorAndGetStream ();
2863 <<
"exportLIDs.extent(0) = " << exportLIDs.extent (0)
2864 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2865 <<
"." << std::endl;
2868 if (exportLIDs.need_sync_host ()) {
2869 std::ostream& err = this->markLocalErrorAndGetStream ();
2870 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
2874 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2876 std::ostream& err = this->markLocalErrorAndGetStream ();
2878 <<
"The source (input) object of the Import or "
2879 "Export is either not a BlockCrsMatrix, or does not have the right "
2880 "template parameters. checkSizes() should have caught this. "
2881 "Please report this bug to the Tpetra developers." << std::endl;
2893 constantNumPackets = 0;
2896 const crs_graph_type& srcGraph = src->graph_;
2897 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2898 const size_t numExportLIDs = exportLIDs.extent (0);
2899 const size_t numBytesPerValue =
2900 PackTraits<impl_scalar_type, host_exec>
2901 ::packValueCount(this->val_.extent(0) ? this->val_.view_host()(0) : impl_scalar_type());
2907 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
2910 auto exportLIDsHost = exportLIDs.view_host();
2911 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2912 numPacketsPerLID.modify_host ();
2914 using reducer_type = Impl::BlockCrsReducer<Impl::BlockCrsRowStruct<size_t>,host_exec>;
2915 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs);
2916 Kokkos::parallel_reduce
2918 [=](
const size_t &i,
typename reducer_type::value_type &update) {
2919 const LO lclRow = exportLIDsHost(i);
2920 size_t numEnt = srcGraph.getNumEntriesInLocalRow (lclRow);
2921 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid () ? 0 : numEnt);
2923 const size_t numBytes =
2924 packRowCount<LO, GO, host_exec> (numEnt, numBytesPerValue, blockSize);
2925 numPacketsPerLIDHost(i) = numBytes;
2926 update +=
typename reducer_type::value_type(numEnt, numBytes, numEnt);
2927 }, rowReducerStruct);
2933 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
2934 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
2935 const size_t maxRowLength = rowReducerStruct.maxRowLength;
2938 std::ostringstream os;
2940 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2942 std::cerr << os.str ();
2948 if(exports.extent(0) != totalNumBytes)
2950 const std::string oldLabel = exports.d_view.label ();
2951 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
2952 exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
2954 if (totalNumEntries > 0) {
2956 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs+1);
2958 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs+1);
2959 Kokkos::parallel_scan
2961 [=](
const size_t &i,
size_t &update,
const bool &
final) {
2962 if (
final) offset(i) = update;
2963 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
2966 if (offset(numExportLIDs) != totalNumBytes) {
2967 std::ostream& err = this->markLocalErrorAndGetStream ();
2969 <<
"At end of method, the final offset (in bytes) "
2970 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
2971 << totalNumBytes <<
". "
2972 <<
"Please report this bug to the Tpetra developers." << std::endl;
2982 const map_type& srcColMap = * (srcGraph.getColMap ());
2986 exports.modify_host();
2988 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2990 policy_type(numExportLIDs, 1, 1)
2991 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2992 Kokkos::parallel_for
2994 [=](
const typename policy_type::member_type &member) {
2995 const size_t i = member.league_rank();
2996 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2997 gblColInds(member.team_scratch(0), maxRowLength);
2999 const LO lclRowInd = exportLIDsHost(i);
3000 const LO* lclColIndsRaw;
3006 (void) src->getLocalRowView (lclRowInd, lclColIndsRaw, valsRaw, numEntLO);
3008 const size_t numEnt =
static_cast<size_t> (numEntLO);
3009 Kokkos::View<const LO*,host_exec> lclColInds (lclColIndsRaw, numEnt);
3012 for (
size_t j = 0; j < numEnt; ++j)
3013 gblColInds(j) = srcColMap.getGlobalElement (lclColInds(j));
3019 const size_t numBytes = packRowForBlockCrs<impl_scalar_type,LO,GO,host_exec>
3020 (exports.view_host(),
3023 Kokkos::View<const GO*,host_exec>(gblColInds.data(), maxRowLength),
3024 Kokkos::View<const impl_scalar_type*,host_exec>(reinterpret_cast<const impl_scalar_type*>(valsRaw), numEnt*blockSize*blockSize),
3030 const size_t offsetDiff = offset(i+1) - offset(i);
3031 if (numBytes != offsetDiff) {
3032 std::ostringstream os;
3034 <<
"numBytes computed from packRowForBlockCrs is different from "
3035 <<
"precomputed offset values, LID = " << i << std::endl;
3036 std::cerr << os.str ();
3044 std::ostringstream os;
3045 const bool lclSuccess = ! (* (this->localError_));
3047 << (lclSuccess ?
"succeeded" :
"FAILED")
3049 std::cerr << os.str ();
3053 template<
class Scalar,
class LO,
class GO,
class Node>
3055 BlockCrsMatrix<Scalar, LO, GO, Node>::
3056 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3058 #else // TPETRA_ENABLE_DEPRECATED_CODE
3060 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3061 (
const Kokkos::DualView<
const local_ordinal_type*,
3062 buffer_device_type>& importLIDs,
3063 Kokkos::DualView<packet_type*,
3064 buffer_device_type> imports,
3065 Kokkos::DualView<
size_t*,
3066 buffer_device_type> numPacketsPerLID,
3071 using ::Tpetra::Details::Behavior;
3073 using ::Tpetra::Details::ProfilingRegion;
3074 using ::Tpetra::Details::PackTraits;
3077 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
3079 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
3080 const bool verbose = Behavior::verbose ();
3085 std::ostringstream os;
3086 auto map = this->graph_.getRowMap ();
3087 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
3088 const int myRank = comm.is_null () ? -1 : comm->getRank ();
3089 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
3092 os <<
"Start" << endl;
3093 std::cerr << os.str ();
3098 if (* (this->localError_)) {
3099 std::ostream& err = this->markLocalErrorAndGetStream ();
3100 std::ostringstream os;
3101 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
3103 std::cerr << os.str ();
3112 if (importLIDs.extent (0) != numPacketsPerLID.extent (0)) {
3113 std::ostream& err = this->markLocalErrorAndGetStream ();
3114 std::ostringstream os;
3115 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent (0)
3116 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
3119 std::cerr << os.str ();
3125 if (combineMode !=
ADD && combineMode !=
INSERT &&
3127 combineMode != ZERO) {
3128 std::ostream& err = this->markLocalErrorAndGetStream ();
3129 std::ostringstream os;
3131 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
3132 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
3135 std::cerr << os.str ();
3141 if (this->graph_.getColMap ().is_null ()) {
3142 std::ostream& err = this->markLocalErrorAndGetStream ();
3143 std::ostringstream os;
3144 os << prefix <<
"Target matrix's column Map is null." << endl;
3146 std::cerr << os.str ();
3155 const map_type& tgtColMap = * (this->graph_.getColMap ());
3158 const size_t blockSize = this->getBlockSize ();
3159 const size_t numImportLIDs = importLIDs.extent(0);
3166 const size_t numBytesPerValue =
3167 PackTraits<impl_scalar_type, host_exec>::packValueCount
3168 (this->val_.extent (0) ? this->val_.view_host () (0) : impl_scalar_type ());
3169 const size_t maxRowNumEnt = graph_.getNodeMaxNumRowEntries ();
3170 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
3173 std::ostringstream os;
3174 os << prefix <<
"combineMode: "
3175 << ::Tpetra::combineModeToString (combineMode)
3176 <<
", blockSize: " << blockSize
3177 <<
", numImportLIDs: " << numImportLIDs
3178 <<
", numBytesPerValue: " << numBytesPerValue
3179 <<
", maxRowNumEnt: " << maxRowNumEnt
3180 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
3181 std::cerr << os.str ();
3184 if (combineMode == ZERO || numImportLIDs == 0) {
3186 std::ostringstream os;
3187 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
3188 std::cerr << os.str ();
3195 if (imports.need_sync_host ()) {
3196 imports.sync_host ();
3206 if (imports.need_sync_host () || numPacketsPerLID.need_sync_host () ||
3207 importLIDs.need_sync_host ()) {
3208 std::ostream& err = this->markLocalErrorAndGetStream ();
3209 std::ostringstream os;
3210 os << prefix <<
"All input DualViews must be sync'd to host by now. "
3211 <<
"imports_nc.need_sync_host()="
3212 << (imports.need_sync_host () ?
"true" :
"false")
3213 <<
", numPacketsPerLID.need_sync_host()="
3214 << (numPacketsPerLID.need_sync_host () ?
"true" :
"false")
3215 <<
", importLIDs.need_sync_host()="
3216 << (importLIDs.need_sync_host () ?
"true" :
"false")
3219 std::cerr << os.str ();
3225 const auto importLIDsHost = importLIDs.view_host ();
3226 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host ();
3232 Kokkos::View<size_t*, host_exec> offset (
"offset", numImportLIDs+1);
3234 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numImportLIDs+1);
3235 Kokkos::parallel_scan
3236 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
3237 [=] (
const size_t &i,
size_t &update,
const bool &
final) {
3238 if (
final) offset(i) = update;
3239 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
3249 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
3250 errorDuringUnpack (
"errorDuringUnpack");
3251 errorDuringUnpack () = 0;
3253 using policy_type = Kokkos::TeamPolicy<host_exec>;
3254 const auto policy = policy_type (numImportLIDs, 1, 1)
3255 .set_scratch_size (0, Kokkos::PerTeam (
sizeof (GO) * maxRowNumEnt +
3256 sizeof (LO) * maxRowNumEnt +
3257 numBytesPerValue * maxRowNumScalarEnt));
3258 using host_scratch_space =
typename host_exec::scratch_memory_space;
3259 using pair_type = Kokkos::pair<size_t, size_t>;
3260 Kokkos::parallel_for
3261 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
3262 [=] (
const typename policy_type::member_type& member) {
3263 const size_t i = member.league_rank();
3265 Kokkos::View<GO*, host_scratch_space> gblColInds
3266 (member.team_scratch (0), maxRowNumEnt);
3267 Kokkos::View<LO*, host_scratch_space> lclColInds
3268 (member.team_scratch (0), maxRowNumEnt);
3269 Kokkos::View<impl_scalar_type*, host_scratch_space> vals
3270 (member.team_scratch (0), maxRowNumScalarEnt);
3272 const size_t offval = offset(i);
3273 const LO lclRow = importLIDsHost(i);
3274 const size_t numBytes = numPacketsPerLIDHost(i);
3275 const size_t numEnt =
3276 unpackRowCount<impl_scalar_type, LO, GO, host_exec>
3277 (imports.view_host (), offval, numBytes, numBytesPerValue);
3280 if (numEnt > maxRowNumEnt) {
3281 errorDuringUnpack() = 1;
3283 std::ostringstream os;
3285 <<
"At i = " << i <<
", numEnt = " << numEnt
3286 <<
" > maxRowNumEnt = " << maxRowNumEnt
3288 std::cerr << os.str ();
3293 const size_t numScalarEnt = numEnt*blockSize*blockSize;
3294 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
3295 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
3296 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
3301 size_t numBytesOut = 0;
3304 unpackRowForBlockCrs<impl_scalar_type, LO, GO, host_exec>
3305 (Kokkos::View<GO*,host_exec>(gidsOut.data(), numEnt),
3306 Kokkos::View<impl_scalar_type*,host_exec>(valsOut.data(), numScalarEnt),
3307 imports.view_host(),
3308 offval, numBytes, numEnt,
3309 numBytesPerValue, blockSize);
3311 catch (std::exception& e) {
3312 errorDuringUnpack() = 1;
3314 std::ostringstream os;
3315 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
3316 << e.what () << endl;
3317 std::cerr << os.str ();
3322 if (numBytes != numBytesOut) {
3323 errorDuringUnpack() = 1;
3325 std::ostringstream os;
3327 <<
"At i = " << i <<
", numBytes = " << numBytes
3328 <<
" != numBytesOut = " << numBytesOut <<
"."
3330 std::cerr << os.str();
3336 for (
size_t k = 0; k < numEnt; ++k) {
3337 lidsOut(k) = tgtColMap.getLocalElement (gidsOut(k));
3338 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
3340 std::ostringstream os;
3342 <<
"At i = " << i <<
", GID " << gidsOut(k)
3343 <<
" is not owned by the calling process."
3345 std::cerr << os.str();
3353 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
3354 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>
3355 (
const_cast<const impl_scalar_type*
> (valsOut.data ()));
3356 if (combineMode ==
ADD) {
3357 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3358 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
3359 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3360 }
else if (combineMode ==
ABSMAX) {
3361 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3364 if (static_cast<LO> (numEnt) != numCombd) {
3365 errorDuringUnpack() = 1;
3367 std::ostringstream os;
3368 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3369 <<
" != numCombd = " << numCombd <<
"."
3371 std::cerr << os.str ();
3378 if (errorDuringUnpack () != 0) {
3379 std::ostream& err = this->markLocalErrorAndGetStream ();
3380 err << prefix <<
"Unpacking failed.";
3382 err <<
" Please run again with the environment variable setting "
3383 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
3389 std::ostringstream os;
3390 const bool lclSuccess = ! (* (this->localError_));
3392 << (lclSuccess ?
"succeeded" :
"FAILED")
3394 std::cerr << os.str ();
3398 template<
class Scalar,
class LO,
class GO,
class Node>
3402 using Teuchos::TypeNameTraits;
3403 std::ostringstream os;
3404 os <<
"\"Tpetra::BlockCrsMatrix\": { "
3405 <<
"Template parameters: { "
3406 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
3407 <<
"LO: " << TypeNameTraits<LO>::name ()
3408 <<
"GO: " << TypeNameTraits<GO>::name ()
3409 <<
"Node: " << TypeNameTraits<Node>::name ()
3411 <<
", Label: \"" << this->getObjectLabel () <<
"\""
3412 <<
", Global dimensions: ["
3413 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3414 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
3415 <<
", Block size: " << getBlockSize ()
3421 template<
class Scalar,
class LO,
class GO,
class Node>
3425 const Teuchos::EVerbosityLevel verbLevel)
const
3427 using Teuchos::ArrayRCP;
3428 using Teuchos::CommRequest;
3429 using Teuchos::FancyOStream;
3430 using Teuchos::getFancyOStream;
3431 using Teuchos::ireceive;
3432 using Teuchos::isend;
3433 using Teuchos::outArg;
3434 using Teuchos::TypeNameTraits;
3435 using Teuchos::VERB_DEFAULT;
3436 using Teuchos::VERB_NONE;
3437 using Teuchos::VERB_LOW;
3438 using Teuchos::VERB_MEDIUM;
3440 using Teuchos::VERB_EXTREME;
3442 using Teuchos::wait;
3444 #ifdef HAVE_TPETRA_DEBUG
3445 const char prefix[] =
"Tpetra::BlockCrsMatrix::describe: ";
3446 #endif // HAVE_TPETRA_DEBUG
3449 const Teuchos::EVerbosityLevel vl =
3450 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
3452 if (vl == VERB_NONE) {
3457 Teuchos::OSTab tab0 (out);
3459 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
3460 Teuchos::OSTab tab1 (out);
3462 out <<
"Template parameters:" << endl;
3464 Teuchos::OSTab tab2 (out);
3465 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
3466 <<
"LO: " << TypeNameTraits<LO>::name () << endl
3467 <<
"GO: " << TypeNameTraits<GO>::name () << endl
3468 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
3470 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
3471 <<
"Global dimensions: ["
3472 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3473 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
3475 const LO blockSize = getBlockSize ();
3476 out <<
"Block size: " << blockSize << endl;
3479 if (vl >= VERB_MEDIUM) {
3480 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3481 const int myRank = comm.getRank ();
3483 out <<
"Row Map:" << endl;
3485 getRowMap()->describe (out, vl);
3487 if (! getColMap ().is_null ()) {
3488 if (getColMap() == getRowMap()) {
3490 out <<
"Column Map: same as row Map" << endl;
3495 out <<
"Column Map:" << endl;
3497 getColMap ()->describe (out, vl);
3500 if (! getDomainMap ().is_null ()) {
3501 if (getDomainMap () == getRowMap ()) {
3503 out <<
"Domain Map: same as row Map" << endl;
3506 else if (getDomainMap () == getColMap ()) {
3508 out <<
"Domain Map: same as column Map" << endl;
3513 out <<
"Domain Map:" << endl;
3515 getDomainMap ()->describe (out, vl);
3518 if (! getRangeMap ().is_null ()) {
3519 if (getRangeMap () == getDomainMap ()) {
3521 out <<
"Range Map: same as domain Map" << endl;
3524 else if (getRangeMap () == getRowMap ()) {
3526 out <<
"Range Map: same as row Map" << endl;
3531 out <<
"Range Map: " << endl;
3533 getRangeMap ()->describe (out, vl);
3538 if (vl >= VERB_EXTREME) {
3544 const_cast<this_type*
> (
this)->sync_host ();
3546 #ifdef HAVE_TPETRA_DEBUG
3547 TEUCHOS_TEST_FOR_EXCEPTION
3548 (this->need_sync_host (), std::logic_error,
3549 prefix <<
"Right after sync to host, the matrix claims that it needs "
3550 "sync to host. Please report this bug to the Tpetra developers.");
3551 #endif // HAVE_TPETRA_DEBUG
3553 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3554 const int myRank = comm.getRank ();
3555 const int numProcs = comm.getSize ();
3558 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
3559 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
3560 FancyOStream& os = *osPtr;
3561 os <<
"Process " << myRank <<
":" << endl;
3562 Teuchos::OSTab tab2 (os);
3564 const map_type& meshRowMap = * (graph_.getRowMap ());
3565 const map_type& meshColMap = * (graph_.getColMap ());
3570 os <<
"Row " << meshGblRow <<
": {";
3572 const LO* lclColInds = NULL;
3573 Scalar* vals = NULL;
3575 this->getLocalRowView (meshLclRow, lclColInds, vals, numInds);
3577 for (LO k = 0; k < numInds; ++k) {
3580 os <<
"Col " << gblCol <<
": [";
3581 for (LO i = 0; i < blockSize; ++i) {
3582 for (LO j = 0; j < blockSize; ++j) {
3583 os << vals[blockSize*blockSize*k + i*blockSize + j];
3584 if (j + 1 < blockSize) {
3588 if (i + 1 < blockSize) {
3593 if (k + 1 < numInds) {
3603 out << lclOutStrPtr->str ();
3604 lclOutStrPtr = Teuchos::null;
3607 const int sizeTag = 1337;
3608 const int dataTag = 1338;
3610 ArrayRCP<char> recvDataBuf;
3614 for (
int p = 1; p < numProcs; ++p) {
3617 ArrayRCP<size_t> recvSize (1);
3619 RCP<CommRequest<int> > recvSizeReq =
3620 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
3621 wait<int> (comm, outArg (recvSizeReq));
3622 const size_t numCharsToRecv = recvSize[0];
3629 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
3630 recvDataBuf.resize (numCharsToRecv + 1);
3632 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
3634 RCP<CommRequest<int> > recvDataReq =
3635 ireceive<int, char> (recvData, p, dataTag, comm);
3636 wait<int> (comm, outArg (recvDataReq));
3641 recvDataBuf[numCharsToRecv] =
'\0';
3642 out << recvDataBuf.getRawPtr ();
3644 else if (myRank == p) {
3648 const std::string stringToSend = lclOutStrPtr->str ();
3649 lclOutStrPtr = Teuchos::null;
3652 const size_t numCharsToSend = stringToSend.size ();
3653 ArrayRCP<size_t> sendSize (1);
3654 sendSize[0] = numCharsToSend;
3655 RCP<CommRequest<int> > sendSizeReq =
3656 isend<int, size_t> (sendSize, 0, sizeTag, comm);
3657 wait<int> (comm, outArg (sendSizeReq));
3665 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
3666 RCP<CommRequest<int> > sendDataReq =
3667 isend<int, char> (sendData, 0, dataTag, comm);
3668 wait<int> (comm, outArg (sendDataReq));
3674 template<
class Scalar,
class LO,
class GO,
class Node>
3675 Teuchos::RCP<const Teuchos::Comm<int> >
3679 return graph_.getComm();
3682 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3683 template<
class Scalar,
class LO,
class GO,
class Node>
3689 return Teuchos::null;
3692 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3694 template<
class Scalar,
class LO,
class GO,
class Node>
3699 return graph_.getGlobalNumCols();
3702 template<
class Scalar,
class LO,
class GO,
class Node>
3707 return graph_.getNodeNumCols();
3710 template<
class Scalar,
class LO,
class GO,
class Node>
3715 return graph_.getIndexBase();
3718 template<
class Scalar,
class LO,
class GO,
class Node>
3723 return graph_.getGlobalNumEntries();
3726 template<
class Scalar,
class LO,
class GO,
class Node>
3731 return graph_.getNodeNumEntries();
3734 template<
class Scalar,
class LO,
class GO,
class Node>
3739 return graph_.getNumEntriesInGlobalRow(globalRow);
3742 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3743 template<
class Scalar,
class LO,
class GO,
class Node>
3748 using HDM = Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3749 return dynamic_cast<const HDM&
> (this->graph_).getGlobalNumDiagsImpl ();
3752 template<
class Scalar,
class LO,
class GO,
class Node>
3753 size_t TPETRA_DEPRECATED
3754 BlockCrsMatrix<Scalar, LO, GO, Node>::
3755 getNodeNumDiags ()
const
3757 using HDM = Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3758 return dynamic_cast<const HDM&
> (this->graph_).getNodeNumDiagsImpl ();
3760 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3762 template<
class Scalar,
class LO,
class GO,
class Node>
3767 return graph_.getGlobalMaxNumRowEntries();
3770 template<
class Scalar,
class LO,
class GO,
class Node>
3775 return graph_.hasColMap();
3778 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3779 template<
class Scalar,
class LO,
class GO,
class Node>
3780 bool TPETRA_DEPRECATED
3784 using HDM = ::Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3785 return dynamic_cast<const HDM&
> (this->graph_).isLowerTriangularImpl ();
3788 template<
class Scalar,
class LO,
class GO,
class Node>
3789 bool TPETRA_DEPRECATED
3790 BlockCrsMatrix<Scalar, LO, GO, Node>::
3791 isUpperTriangular ()
const
3793 using HDM = ::Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3794 return dynamic_cast<const HDM&
> (this->graph_).isUpperTriangularImpl ();
3796 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3798 template<
class Scalar,
class LO,
class GO,
class Node>
3803 return graph_.isLocallyIndexed();
3806 template<
class Scalar,
class LO,
class GO,
class Node>
3811 return graph_.isGloballyIndexed();
3814 template<
class Scalar,
class LO,
class GO,
class Node>
3819 return graph_.isFillComplete ();
3822 template<
class Scalar,
class LO,
class GO,
class Node>
3831 template<
class Scalar,
class LO,
class GO,
class Node>
3835 const Teuchos::ArrayView<GO> &,
3836 const Teuchos::ArrayView<Scalar> &,
3839 TEUCHOS_TEST_FOR_EXCEPTION(
3840 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
3841 "This class doesn't support global matrix indexing.");
3845 template<
class Scalar,
class LO,
class GO,
class Node>
3849 Teuchos::ArrayView<const GO> &,
3850 Teuchos::ArrayView<const Scalar> &)
const
3852 TEUCHOS_TEST_FOR_EXCEPTION(
3853 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
3854 "This class doesn't support global matrix indexing.");
3858 template<
class Scalar,
class LO,
class GO,
class Node>
3862 Teuchos::ArrayView<const LO>& ,
3863 Teuchos::ArrayView<const Scalar>& )
const
3865 TEUCHOS_TEST_FOR_EXCEPTION(
3866 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getLocalRowView: "
3867 "This class doesn't support local matrix indexing.");
3870 template<
class Scalar,
class LO,
class GO,
class Node>
3875 #ifdef HAVE_TPETRA_DEBUG
3876 const char prefix[] =
3877 "Tpetra::BlockCrsMatrix::getLocalDiagCopy: ";
3878 #endif // HAVE_TPETRA_DEBUG
3880 const size_t lclNumMeshRows = graph_.getNodeNumRows ();
3882 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3883 graph_.getLocalDiagOffsets (diagOffsets);
3886 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3889 diag.template modify<typename decltype (diagOffsetsHost)::memory_space> ();
3891 #ifdef HAVE_TPETRA_DEBUG
3892 TEUCHOS_TEST_FOR_EXCEPTION
3893 (this->need_sync_host (), std::runtime_error,
3894 prefix <<
"The matrix's data were last modified on device, but have "
3895 "not been sync'd to host. Please sync to host (by calling "
3896 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
3898 #endif // HAVE_TPETRA_DEBUG
3900 auto vals_host_out = getValuesHost ();
3901 Scalar* vals_host_out_raw =
3902 reinterpret_cast<Scalar*
> (vals_host_out.data ());
3905 size_t rowOffset = 0;
3907 LO bs = getBlockSize();
3908 for(
size_t r=0; r<getNodeNumRows(); r++)
3911 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3912 for(
int b=0; b<bs; b++)
3917 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3920 diag.template sync<memory_space> ();
3923 template<
class Scalar,
class LO,
class GO,
class Node>
3928 TEUCHOS_TEST_FOR_EXCEPTION(
3929 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3930 "not implemented.");
3934 template<
class Scalar,
class LO,
class GO,
class Node>
3939 TEUCHOS_TEST_FOR_EXCEPTION(
3940 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3941 "not implemented.");
3945 template<
class Scalar,
class LO,
class GO,
class Node>
3946 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3953 template<
class Scalar,
class LO,
class GO,
class Node>
3954 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3958 TEUCHOS_TEST_FOR_EXCEPTION(
3959 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3960 "not implemented.");
3970 #define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3971 template class BlockCrsMatrix< S, LO, GO, NODE >;
3973 #endif // TPETRA_BLOCKCRSMATRIX_DEF_HPP
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const
The current number of entries on the calling process in the specified global row. ...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
void gaussSeidelCopy(MultiVector< Scalar, LO, GO, Node > &X, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &B, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
size_t getNodeNumRows() const
get the local number of block rows
virtual bool isGloballyIndexed() const
Whether matrix indices are globally indexed.
size_t getNumEntriesInLocalRow(const LO localRowInd) const
Return the number of entries in the given row on the calling process.
virtual void getGlobalRowView(GO GlobalRow, Teuchos::ArrayView< const GO > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const
The Frobenius norm of the matrix.
virtual global_size_t getGlobalNumCols() const
The global number of columns of this matrix.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
One or more distributed dense vectors.
virtual bool isLocallyIndexed() const
Whether matrix indices are locally indexed.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Linear algebra kernels for small dense matrices and vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the left with the given Vector x.
virtual bool hasColMap() const
Whether this matrix has a well-defined column Map.
LocalOrdinal getMinLocalIndex() const
The minimum local index.
Kokkos::View< impl_scalar_type *, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_vec_type
The type used to access nonconst vector blocks.
virtual size_t getNodeNumEntries() const
The local number of stored (structurally nonzero) entries.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
virtual size_t getNodeNumCols() const
The number of columns needed to apply the forward operator on this node.
int local_ordinal_type
Default value of Scalar template parameter.
MultiVector for multiple degrees of freedom per mesh point.
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
size_t global_size_t
Global size_t object.
size_t getNodeNumEntries() const override
The local number of entries in the graph.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
virtual bool isFillComplete() const
Whether fillComplete() has been called.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Like sumIntoLocalValues, but for the ABSMAX combine mode.
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
virtual size_t getGlobalMaxNumRowEntries() const
The maximum number of entries in any row over all processes in the matrix's communicator.
Insert new values that don't currently exist.
size_t getNodeMaxNumRowEntries() const
Maximum number of entries in any row of the matrix, on this process.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the right with the given Vector x.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
Teuchos::RCP< const map_type > getRangeMap() const
Get the (point) range Map of this matrix.
Teuchos::RCP< const map_type > getRowMap() const
get the (mesh) map for the rows of this block matrix.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
virtual global_size_t getGlobalNumEntries() const
The global number of stored (structurally nonzero) entries.
Sets up and executes a communication plan for a Tpetra DistObject.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
global_size_t getGlobalNumRows() const
get the global number of block rows
CrsGraphType::global_ordinal_type getGlobalNumDiags(const CrsGraphType &G)
Number of populated diagonal entries in the given sparse graph, over all processes in the graph's (MP...
virtual void getGlobalRowCopy(GO GlobalRow, const Teuchos::ArrayView< GO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Get a copy of the given global row's entries.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
Replace old value with maximum of magnitudes of old and new values.
Replace existing values with new values.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
Replace old values with zero.
virtual bool supportsRowViews() const
Whether this object implements getLocalRowView() and getGlobalRowView().
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
LO absMaxLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValuesByOffsets, but for the ABSMAX combine mode.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which this matrix is distributed.
LO getLocalRowView(const LO localRowInd, const LO *&colInds, Scalar *&vals, LO &numInds) const
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
LO getNumVectors() const
Get the number of columns (vectors) in the BlockMultiVector.
void localGaussSeidel(const BlockMultiVector< Scalar, LO, GO, Node > &Residual, BlockMultiVector< Scalar, LO, GO, Node > &Solution, const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D_inv, const Scalar &omega, const ESweepDirection direction) const
Local Gauss-Seidel solve, given a factorized diagonal.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
Print a description of this object to the given output stream.
A distributed dense vector.
Teuchos::RCP< const map_type > getDomainMap() const
Get the (point) domain Map of this matrix.
void getLocalRowCopy(LO LocalRow, const Teuchos::ArrayView< LO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Not implemented.
local_graph_type getLocalGraph() const
Get the local graph.
Teuchos::RCP< const map_type > getColMap() const
get the (mesh) map for the columns of this block matrix.
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const
Get the (mesh) graph.
void reorderedGaussSeidelCopy(::Tpetra::MultiVector< Scalar, LO, GO, Node > &X, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &B, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &D, const Teuchos::ArrayView< LO > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
LocalOrdinal getMaxLocalIndex() const
The maximum local index on the calling process.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
std::string description() const
One-line description of this object.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value) const
Replace current value at the specified location with specified values.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
virtual GO getIndexBase() const
The index base for global indices in this matrix.