40 #ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
41 #define TPETRA_BLOCKCRSMATRIX_DEF_HPP
49 #include "Tpetra_BlockMultiVector.hpp"
52 #include "Teuchos_TimeMonitor.hpp"
53 #ifdef HAVE_TPETRA_DEBUG
55 #endif // HAVE_TPETRA_DEBUG
69 #if defined(__CUDACC__)
71 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
72 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
73 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
75 #elif defined(__GNUC__)
77 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
78 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
79 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
81 #else // some other compiler
84 # if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
85 # define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1
86 # endif // ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
87 #endif // defined(__CUDACC__), defined(__GNUC__)
95 struct BlockCrsRowStruct {
96 T totalNumEntries, totalNumBytes, maxRowLength;
97 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
98 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
99 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
100 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
105 KOKKOS_INLINE_FUNCTION
106 void operator+=(
volatile BlockCrsRowStruct<T> &a,
107 volatile const BlockCrsRowStruct<T> &b) {
108 a.totalNumEntries += b.totalNumEntries;
109 a.totalNumBytes += b.totalNumBytes;
110 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
115 KOKKOS_INLINE_FUNCTION
116 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
117 a.totalNumEntries += b.totalNumEntries;
118 a.totalNumBytes += b.totalNumBytes;
119 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
122 template<
typename T,
typename ExecSpace>
123 struct BlockCrsReducer {
124 typedef BlockCrsReducer reducer;
125 typedef T value_type;
126 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
129 KOKKOS_INLINE_FUNCTION
130 BlockCrsReducer(value_type &val) : value(&val) {}
132 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
133 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type &dst,
const volatile value_type &src)
const { dst += src; }
134 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
135 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
136 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
139 template<
class AlphaCoeffType,
141 class MatrixValuesType,
146 class BcrsApplyNoTransFunctor {
148 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
149 "MatrixValuesType must be a Kokkos::View.");
150 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
151 "OutVecType must be a Kokkos::View.");
152 static_assert (Kokkos::Impl::is_view<InVecType>::value,
153 "InVecType must be a Kokkos::View.");
154 static_assert (std::is_same<MatrixValuesType,
155 typename MatrixValuesType::const_type>::value,
156 "MatrixValuesType must be a const Kokkos::View.");
157 static_assert (std::is_same<OutVecType,
158 typename OutVecType::non_const_type>::value,
159 "OutVecType must be a nonconst Kokkos::View.");
160 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
161 "InVecType must be a const Kokkos::View.");
162 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
163 "MatrixValuesType must be a rank-1 Kokkos::View.");
164 static_assert (static_cast<int> (InVecType::rank) == 1,
165 "InVecType must be a rank-1 Kokkos::View.");
166 static_assert (static_cast<int> (OutVecType::rank) == 1,
167 "OutVecType must be a rank-1 Kokkos::View.");
168 typedef typename MatrixValuesType::non_const_value_type scalar_type;
171 typedef typename GraphType::device_type device_type;
174 typedef typename std::remove_const<typename GraphType::data_type>::type
183 void setX (
const InVecType& X) { X_ = X; }
191 void setY (
const OutVecType& Y) { Y_ = Y; }
193 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
194 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
197 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
198 const GraphType& graph,
199 const MatrixValuesType& val,
200 const local_ordinal_type blockSize,
202 const beta_coeff_type& beta,
203 const OutVecType& Y) :
205 ptr_ (graph.row_map),
206 ind_ (graph.entries),
208 blockSize_ (blockSize),
215 KOKKOS_INLINE_FUNCTION
void
216 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
218 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
222 KOKKOS_INLINE_FUNCTION
void
223 operator () (
const local_ordinal_type& lclRow)
const
229 using Kokkos::Details::ArithTraits;
235 using Kokkos::parallel_for;
236 using Kokkos::subview;
237 typedef typename decltype (ptr_)::non_const_value_type offset_type;
238 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
241 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
244 const offset_type Y_ptBeg = lclRow * blockSize_;
245 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
246 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
250 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
251 FILL (Y_cur, ArithTraits<beta_coeff_type>::zero ());
253 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
257 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
258 const offset_type blkBeg = ptr_[lclRow];
259 const offset_type blkEnd = ptr_[lclRow+1];
261 const offset_type bs2 = blockSize_ * blockSize_;
262 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
264 little_block_type A_cur (val_.data () + absBlkOff * bs2,
265 blockSize_, blockSize_);
266 const offset_type X_blkCol = ind_[absBlkOff];
267 const offset_type X_ptBeg = X_blkCol * blockSize_;
268 const offset_type X_ptEnd = X_ptBeg + blockSize_;
269 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
271 GEMV (alpha_, A_cur, X_cur, Y_cur);
277 alpha_coeff_type alpha_;
278 typename GraphType::row_map_type::const_type ptr_;
279 typename GraphType::entries_type::const_type ind_;
280 MatrixValuesType val_;
283 beta_coeff_type beta_;
287 template<
class AlphaCoeffType,
289 class MatrixValuesType,
293 class BcrsApplyNoTransFunctor<AlphaCoeffType,
301 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
302 "MatrixValuesType must be a Kokkos::View.");
303 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
304 "OutVecType must be a Kokkos::View.");
305 static_assert (Kokkos::Impl::is_view<InVecType>::value,
306 "InVecType must be a Kokkos::View.");
307 static_assert (std::is_same<MatrixValuesType,
308 typename MatrixValuesType::const_type>::value,
309 "MatrixValuesType must be a const Kokkos::View.");
310 static_assert (std::is_same<OutVecType,
311 typename OutVecType::non_const_type>::value,
312 "OutVecType must be a nonconst Kokkos::View.");
313 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
314 "InVecType must be a const Kokkos::View.");
315 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
316 "MatrixValuesType must be a rank-1 Kokkos::View.");
317 static_assert (static_cast<int> (InVecType::rank) == 1,
318 "InVecType must be a rank-1 Kokkos::View.");
319 static_assert (static_cast<int> (OutVecType::rank) == 1,
320 "OutVecType must be a rank-1 Kokkos::View.");
321 typedef typename MatrixValuesType::non_const_value_type scalar_type;
324 typedef typename GraphType::device_type device_type;
327 typedef typename std::remove_const<typename GraphType::data_type>::type
336 void setX (
const InVecType& X) { X_ = X; }
344 void setY (
const OutVecType& Y) { Y_ = Y; }
346 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
347 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
350 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
351 const GraphType& graph,
352 const MatrixValuesType& val,
353 const local_ordinal_type blockSize,
355 const beta_coeff_type& beta,
356 const OutVecType& Y) :
358 ptr_ (graph.row_map),
359 ind_ (graph.entries),
361 blockSize_ (blockSize),
368 KOKKOS_INLINE_FUNCTION
void
369 operator () (
const local_ordinal_type& lclRow)
const
371 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
375 KOKKOS_INLINE_FUNCTION
void
376 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
380 using Kokkos::Details::ArithTraits;
386 using Kokkos::parallel_for;
387 using Kokkos::subview;
388 typedef typename decltype (ptr_)::non_const_value_type offset_type;
389 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
392 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
395 const offset_type Y_ptBeg = lclRow * blockSize_;
396 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
397 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
401 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
402 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
403 [&](
const local_ordinal_type &i) {
404 Y_cur(i) = ArithTraits<beta_coeff_type>::zero ();
407 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
408 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
409 [&](
const local_ordinal_type &i) {
413 member.team_barrier();
415 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
416 const offset_type blkBeg = ptr_[lclRow];
417 const offset_type blkEnd = ptr_[lclRow+1];
419 const offset_type bs2 = blockSize_ * blockSize_;
420 little_block_type A_cur (val_.data (), blockSize_, blockSize_);
421 auto X_cur = subview (X_, ::Kokkos::make_pair (0, blockSize_));
423 (Kokkos::TeamThreadRange(member, blkBeg, blkEnd),
424 [&](
const local_ordinal_type &absBlkOff) {
425 A_cur.assign_data(val_.data () + absBlkOff * bs2);
426 const offset_type X_blkCol = ind_[absBlkOff];
427 const offset_type X_ptBeg = X_blkCol * blockSize_;
428 X_cur.assign_data(&X_(X_ptBeg));
431 (Kokkos::ThreadVectorRange(member, blockSize_),
432 [&](
const local_ordinal_type &k0) {
434 for (local_ordinal_type k1=0;k1<blockSize_;++k1)
435 val += A_cur(k0,k1)*X_cur(k1);
436 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
438 Y_cur(k0) += alpha_*val;
444 Kokkos::atomic_add(&Y_cur(k0), alpha_*val);
452 alpha_coeff_type alpha_;
453 typename GraphType::row_map_type::const_type ptr_;
454 typename GraphType::entries_type::const_type ind_;
455 MatrixValuesType val_;
458 beta_coeff_type beta_;
463 template<
class AlphaCoeffType,
465 class MatrixValuesType,
466 class InMultiVecType,
468 class OutMultiVecType>
470 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
471 const GraphType& graph,
472 const MatrixValuesType& val,
473 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
474 const InMultiVecType& X,
475 const BetaCoeffType& beta,
476 const OutMultiVecType& Y
479 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
480 "MatrixValuesType must be a Kokkos::View.");
481 static_assert (Kokkos::Impl::is_view<OutMultiVecType>::value,
482 "OutMultiVecType must be a Kokkos::View.");
483 static_assert (Kokkos::Impl::is_view<InMultiVecType>::value,
484 "InMultiVecType must be a Kokkos::View.");
485 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
486 "MatrixValuesType must be a rank-1 Kokkos::View.");
487 static_assert (static_cast<int> (OutMultiVecType::rank) == 2,
488 "OutMultiVecType must be a rank-2 Kokkos::View.");
489 static_assert (static_cast<int> (InMultiVecType::rank) == 2,
490 "InMultiVecType must be a rank-2 Kokkos::View.");
492 typedef typename GraphType::device_type::execution_space execution_space;
493 typedef typename MatrixValuesType::const_type matrix_values_type;
494 typedef typename OutMultiVecType::non_const_type out_multivec_type;
495 typedef typename InMultiVecType::const_type in_multivec_type;
496 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_type;
497 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_type;
498 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
500 constexpr
bool is_builtin_type_enabled = std::is_arithmetic<typename InMultiVecType::non_const_value_type>::value;
502 const LO numLocalMeshRows = graph.row_map.extent (0) == 0 ?
503 static_cast<LO
> (0) :
504 static_cast<LO> (graph.row_map.extent (0) - 1);
505 const LO numVecs = Y.extent (1);
506 if (numLocalMeshRows == 0 || numVecs == 0) {
513 in_multivec_type X_in = X;
514 out_multivec_type Y_out = Y;
519 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
520 typedef decltype (Kokkos::subview (Y_out, Kokkos::ALL (), 0)) out_vec_type;
521 typedef BcrsApplyNoTransFunctor<alpha_type, GraphType,
522 matrix_values_type, in_vec_type, beta_type, out_vec_type,
523 is_builtin_type_enabled> functor_type;
525 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
526 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
529 if (is_builtin_type_enabled) {
530 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
532 typedef Kokkos::TeamPolicy<execution_space> policy_type;
533 policy_type policy(1,1);
534 #if defined(KOKKOS_ENABLE_CUDA)
535 constexpr
bool is_cuda = std::is_same<execution_space, Kokkos::Cuda>::value;
537 constexpr
bool is_cuda =
false;
538 #endif // defined(KOKKOS_ENABLE_CUDA)
540 LO team_size = 8, vector_size = 1;
541 if (blockSize <= 5) vector_size = 4;
542 else if (blockSize <= 9) vector_size = 8;
543 else if (blockSize <= 12) vector_size = 12;
544 else if (blockSize <= 20) vector_size = 20;
545 else vector_size = 20;
546 policy = policy_type(numLocalMeshRows, team_size, vector_size);
548 policy = policy_type(numLocalMeshRows, 1, 1);
550 Kokkos::parallel_for (policy, functor);
553 for (LO j = 1; j < numVecs; ++j) {
554 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
555 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
558 Kokkos::parallel_for (policy, functor);
561 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
562 Kokkos::RangePolicy<execution_space,LO> policy(0, numLocalMeshRows);
563 Kokkos::parallel_for (policy, functor);
564 for (LO j = 1; j < numVecs; ++j) {
565 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
566 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
569 Kokkos::parallel_for (policy, functor);
579 template<
class Scalar,
class LO,
class GO,
class Node>
580 class GetLocalDiagCopy {
582 typedef typename Node::device_type device_type;
583 typedef size_t diag_offset_type;
584 typedef Kokkos::View<
const size_t*, device_type,
585 Kokkos::MemoryUnmanaged> diag_offsets_type;
586 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
588 typedef typename local_graph_type::row_map_type row_offsets_type;
589 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
590 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
591 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
594 GetLocalDiagCopy (
const diag_type& diag,
595 const values_type& val,
596 const diag_offsets_type& diagOffsets,
597 const row_offsets_type& ptr,
598 const LO blockSize) :
600 diagOffsets_ (diagOffsets),
602 blockSize_ (blockSize),
603 offsetPerBlock_ (blockSize_*blockSize_),
607 KOKKOS_INLINE_FUNCTION
void
608 operator() (
const LO& lclRowInd)
const
613 const size_t absOffset = ptr_[lclRowInd];
616 const size_t relOffset = diagOffsets_[lclRowInd];
619 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
623 typedef Kokkos::View<
const IST**, Kokkos::LayoutRight,
624 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
625 const_little_block_type;
626 const_little_block_type D_in (val_.data () + pointOffset,
627 blockSize_, blockSize_);
628 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
634 diag_offsets_type diagOffsets_;
635 row_offsets_type ptr_;
642 template<
class Scalar,
class LO,
class GO,
class Node>
644 BlockCrsMatrix<Scalar, LO, GO, Node>::
645 markLocalErrorAndGetStream ()
647 * (this->localError_) =
true;
648 if ((*errs_).is_null ()) {
649 *errs_ = Teuchos::rcp (
new std::ostringstream ());
654 template<
class Scalar,
class LO,
class GO,
class Node>
658 graph_ (Teuchos::rcp (new
map_type ()), 0),
659 blockSize_ (static_cast<LO> (0)),
660 X_colMap_ (new Teuchos::RCP<
BMV> ()),
661 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
662 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
664 localError_ (new bool (false)),
665 errs_ (new Teuchos::RCP<std::ostringstream> ())
669 template<
class Scalar,
class LO,
class GO,
class Node>
672 const LO blockSize) :
675 rowMeshMap_ (* (graph.getRowMap ())),
676 blockSize_ (blockSize),
677 X_colMap_ (new Teuchos::RCP<
BMV> ()),
678 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
679 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
680 offsetPerBlock_ (blockSize * blockSize),
681 localError_ (new bool (false)),
682 errs_ (new Teuchos::RCP<std::ostringstream> ())
684 TEUCHOS_TEST_FOR_EXCEPTION(
685 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
686 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
687 "rows (isSorted() is false). This class assumes sorted rows.");
689 graphRCP_ = Teuchos::rcpFromRef(graph_);
695 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
696 TEUCHOS_TEST_FOR_EXCEPTION(
697 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
698 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
699 " <= 0. The block size must be positive.");
707 const auto colPointMap = Teuchos::rcp
709 *pointImporter_ = Teuchos::rcp
713 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
714 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
717 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
722 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
723 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
726 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
732 val_ = decltype (val_) (
"val", numValEnt);
735 template<
class Scalar,
class LO,
class GO,
class Node>
740 const LO blockSize) :
743 rowMeshMap_ (* (graph.getRowMap ())),
744 domainPointMap_ (domainPointMap),
745 rangePointMap_ (rangePointMap),
746 blockSize_ (blockSize),
747 X_colMap_ (new Teuchos::RCP<
BMV> ()),
748 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
749 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
750 offsetPerBlock_ (blockSize * blockSize),
751 localError_ (new bool (false)),
752 errs_ (new Teuchos::RCP<std::ostringstream> ())
754 TEUCHOS_TEST_FOR_EXCEPTION(
755 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
756 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
757 "rows (isSorted() is false). This class assumes sorted rows.");
759 graphRCP_ = Teuchos::rcpFromRef(graph_);
765 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
766 TEUCHOS_TEST_FOR_EXCEPTION(
767 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
768 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
769 " <= 0. The block size must be positive.");
773 const auto colPointMap = Teuchos::rcp
775 *pointImporter_ = Teuchos::rcp
779 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
780 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
783 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
788 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
789 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
792 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
798 val_ = decltype (val_) (
"val", numValEnt);
801 template<
class Scalar,
class LO,
class GO,
class Node>
802 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
807 return Teuchos::rcp (
new map_type (domainPointMap_));
810 template<
class Scalar,
class LO,
class GO,
class Node>
811 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
816 return Teuchos::rcp (
new map_type (rangePointMap_));
819 template<
class Scalar,
class LO,
class GO,
class Node>
820 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
824 return graph_.getRowMap();
827 template<
class Scalar,
class LO,
class GO,
class Node>
828 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
832 return graph_.getColMap();
835 template<
class Scalar,
class LO,
class GO,
class Node>
840 return graph_.getGlobalNumRows();
843 template<
class Scalar,
class LO,
class GO,
class Node>
848 return graph_.getNodeNumRows();
851 template<
class Scalar,
class LO,
class GO,
class Node>
856 return graph_.getNodeMaxNumRowEntries();
859 template<
class Scalar,
class LO,
class GO,
class Node>
864 Teuchos::ETransp mode,
869 TEUCHOS_TEST_FOR_EXCEPTION(
870 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
871 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
872 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
873 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
877 const LO blockSize = getBlockSize ();
879 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
880 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
882 catch (std::invalid_argument& e) {
883 TEUCHOS_TEST_FOR_EXCEPTION(
884 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
885 "apply: Either the input MultiVector X or the output MultiVector Y "
886 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
887 "graph. BlockMultiVector's constructor threw the following exception: "
896 const_cast<this_type&
> (*this).applyBlock (X_view, Y_view, mode, alpha, beta);
897 }
catch (std::invalid_argument& e) {
898 TEUCHOS_TEST_FOR_EXCEPTION(
899 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
900 "apply: The implementation method applyBlock complained about having "
901 "an invalid argument. It reported the following: " << e.what ());
902 }
catch (std::logic_error& e) {
903 TEUCHOS_TEST_FOR_EXCEPTION(
904 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
905 "apply: The implementation method applyBlock complained about a "
906 "possible bug in its implementation. It reported the following: "
908 }
catch (std::exception& e) {
909 TEUCHOS_TEST_FOR_EXCEPTION(
910 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
911 "apply: The implementation method applyBlock threw an exception which "
912 "is neither std::invalid_argument nor std::logic_error, but is a "
913 "subclass of std::exception. It reported the following: "
916 TEUCHOS_TEST_FOR_EXCEPTION(
917 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
918 "apply: The implementation method applyBlock threw an exception which "
919 "is not an instance of a subclass of std::exception. This probably "
920 "indicates a bug. Please report this to the Tpetra developers.");
924 template<
class Scalar,
class LO,
class GO,
class Node>
929 Teuchos::ETransp mode,
933 TEUCHOS_TEST_FOR_EXCEPTION(
935 "Tpetra::BlockCrsMatrix::applyBlock: "
936 "X and Y have different block sizes. X.getBlockSize() = "
940 if (mode == Teuchos::NO_TRANS) {
941 applyBlockNoTrans (X, Y, alpha, beta);
942 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
943 applyBlockTrans (X, Y, mode, alpha, beta);
945 TEUCHOS_TEST_FOR_EXCEPTION(
946 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
947 "applyBlock: Invalid 'mode' argument. Valid values are "
948 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
952 template<
class Scalar,
class LO,
class GO,
class Node>
957 #ifdef HAVE_TPETRA_DEBUG
958 const char prefix[] =
"Tpetra::BlockCrsMatrix::setAllToScalar: ";
959 #endif // HAVE_TPETRA_DEBUG
961 if (this->need_sync_device ()) {
965 #ifdef HAVE_TPETRA_DEBUG
966 TEUCHOS_TEST_FOR_EXCEPTION
967 (this->need_sync_host (), std::runtime_error,
968 prefix <<
"The matrix's values need sync on both device and host.");
969 #endif // HAVE_TPETRA_DEBUG
970 this->modify_host ();
978 this->modify_device ();
983 template<
class Scalar,
class LO,
class GO,
class Node>
989 const LO numColInds)
const
991 #ifdef HAVE_TPETRA_DEBUG
992 const char prefix[] =
993 "Tpetra::BlockCrsMatrix::replaceLocalValues: ";
994 #endif // HAVE_TPETRA_DEBUG
996 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1001 return static_cast<LO
> (0);
1005 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1006 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1007 const LO perBlockSize = this->offsetPerBlock ();
1012 #ifdef HAVE_TPETRA_DEBUG
1013 TEUCHOS_TEST_FOR_EXCEPTION
1014 (this->need_sync_host (), std::runtime_error,
1015 prefix <<
"The matrix's data were last modified on device, but have "
1016 "not been sync'd to host. Please sync to host (by calling "
1017 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1019 #endif // HAVE_TPETRA_DEBUG
1021 auto vals_host_out = getValuesHost ();
1024 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1025 const LO relBlockOffset =
1026 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1027 if (relBlockOffset != LINV) {
1036 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1040 vals_host_out_raw + absBlockOffset * perBlockSize;
1045 for (LO i = 0; i < perBlockSize; ++i) {
1046 A_old[i] = A_new[i];
1048 hint = relBlockOffset + 1;
1055 template <
class Scalar,
class LO,
class GO,
class Node>
1059 Kokkos::MemoryUnmanaged>& offsets)
const
1061 graph_.getLocalDiagOffsets (offsets);
1065 template <
class Scalar,
class LO,
class GO,
class Node>
1071 Kokkos::MemoryUnmanaged>& D_inv,
1072 const Scalar& omega,
1077 Kokkos::Details::ArithTraits<impl_scalar_type>::zero ();
1079 Kokkos::Details::ArithTraits<impl_scalar_type>::one ();
1080 const LO numLocalMeshRows =
1081 static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1088 const LO blockSize = getBlockSize ();
1089 Teuchos::Array<impl_scalar_type> localMem (blockSize);
1090 Teuchos::Array<impl_scalar_type> localMat (blockSize*blockSize);
1094 LO rowBegin = 0, rowEnd = 0, rowStride = 0;
1095 if (direction == Forward) {
1097 rowEnd = numLocalMeshRows+1;
1100 else if (direction == Backward) {
1101 rowBegin = numLocalMeshRows;
1105 else if (direction == Symmetric) {
1106 this->localGaussSeidel (B, X, D_inv, omega, Forward);
1107 this->localGaussSeidel (B, X, D_inv, omega, Backward);
1111 const Scalar one_minus_omega = Teuchos::ScalarTraits<Scalar>::one()-omega;
1112 const Scalar minus_omega = -omega;
1115 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1116 const LO actlRow = lclRow - 1;
1119 COPY (B_cur, X_lcl);
1120 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1122 const size_t meshBeg = ptrHost_[actlRow];
1123 const size_t meshEnd = ptrHost_[actlRow+1];
1124 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1125 const LO meshCol = indHost_[absBlkOff];
1126 const_little_block_type A_cur =
1127 getConstLocalBlockFromAbsOffset (absBlkOff);
1131 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1133 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1139 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1141 FILL (X_update, zero);
1142 GEMV (one, D_lcl, X_lcl, X_update);
1146 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1147 for (LO j = 0; j < numVecs; ++j) {
1148 LO actlRow = lclRow-1;
1151 COPY (B_cur, X_lcl);
1152 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1154 const size_t meshBeg = ptrHost_[actlRow];
1155 const size_t meshEnd = ptrHost_[actlRow+1];
1156 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1157 const LO meshCol = indHost_[absBlkOff];
1158 const_little_block_type A_cur =
1159 getConstLocalBlockFromAbsOffset (absBlkOff);
1163 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1164 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1167 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1169 FILL (X_update, zero);
1170 GEMV (one, D_lcl, X_lcl, X_update);
1176 template <
class Scalar,
class LO,
class GO,
class Node>
1189 TEUCHOS_TEST_FOR_EXCEPTION(
1190 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
1191 "gaussSeidelCopy: Not implemented.");
1194 template <
class Scalar,
class LO,
class GO,
class Node>
1200 const Teuchos::ArrayView<LO>& ,
1208 TEUCHOS_TEST_FOR_EXCEPTION(
1209 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
1210 "reorderedGaussSeidelCopy: Not implemented.");
1214 template <
class Scalar,
class LO,
class GO,
class Node>
1218 Kokkos::MemoryUnmanaged>& diag,
1219 const Kokkos::View<
const size_t*, device_type,
1220 Kokkos::MemoryUnmanaged>& offsets)
const
1222 using Kokkos::parallel_for;
1223 typedef typename device_type::execution_space execution_space;
1224 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
1226 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1227 const LO blockSize = this->getBlockSize ();
1228 TEUCHOS_TEST_FOR_EXCEPTION
1229 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
1230 static_cast<LO> (diag.extent (1)) < blockSize ||
1231 static_cast<LO> (diag.extent (2)) < blockSize,
1232 std::invalid_argument, prefix <<
1233 "The input Kokkos::View is not big enough to hold all the data.");
1234 TEUCHOS_TEST_FOR_EXCEPTION
1235 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
1236 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
1237 "diagonal blocks " << lclNumMeshRows <<
".");
1239 #ifdef HAVE_TPETRA_DEBUG
1240 TEUCHOS_TEST_FOR_EXCEPTION
1241 (this->
template need_sync<device_type> (), std::runtime_error,
1242 prefix <<
"The matrix's data were last modified on host, but have "
1243 "not been sync'd to device. Please sync to device (by calling "
1244 "sync<device_type>() on this matrix) before calling this method.");
1245 #endif // HAVE_TPETRA_DEBUG
1247 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
1248 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
1256 const_cast<this_type*
> (
this)->
template getValues<device_type> ();
1258 parallel_for (policy_type (0, lclNumMeshRows),
1259 functor_type (diag, vals_dev, offsets,
1260 graph_.getLocalGraph ().row_map, blockSize_));
1264 template <
class Scalar,
class LO,
class GO,
class Node>
1268 Kokkos::MemoryUnmanaged>& diag,
1269 const Teuchos::ArrayView<const size_t>& offsets)
const
1272 using Kokkos::parallel_for;
1274 Kokkos::MemoryUnmanaged>::HostMirror::execution_space host_exec_space;
1276 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1277 const LO blockSize = this->getBlockSize ();
1278 TEUCHOS_TEST_FOR_EXCEPTION
1279 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
1280 static_cast<LO> (diag.extent (1)) < blockSize ||
1281 static_cast<LO> (diag.extent (2)) < blockSize,
1282 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: "
1283 "The input Kokkos::View is not big enough to hold all the data.");
1284 TEUCHOS_TEST_FOR_EXCEPTION
1285 (static_cast<LO> (offsets.size ()) < lclNumMeshRows,
1286 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: "
1287 "offsets.size() = " << offsets.size () <<
" < local number of diagonal "
1288 "blocks " << lclNumMeshRows <<
".");
1292 typedef Kokkos::RangePolicy<host_exec_space, LO> policy_type;
1293 parallel_for (policy_type (0, lclNumMeshRows), [=] (
const LO& lclMeshRow) {
1294 auto D_in = this->getConstLocalBlockFromRelOffset (lclMeshRow, offsets[lclMeshRow]);
1295 auto D_out = Kokkos::subview (diag, lclMeshRow, ALL (), ALL ());
1301 template<
class Scalar,
class LO,
class GO,
class Node>
1306 const Scalar vals[],
1307 const LO numColInds)
const
1309 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1314 return static_cast<LO
> (0);
1318 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1319 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1320 const LO perBlockSize = this->offsetPerBlock ();
1325 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1326 const LO relBlockOffset =
1327 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1328 if (relBlockOffset != LINV) {
1329 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1330 little_block_type A_old =
1331 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1332 const_little_block_type A_new =
1333 getConstLocalBlockFromInput (vIn, pointOffset);
1336 hint = relBlockOffset + 1;
1344 template<
class Scalar,
class LO,
class GO,
class Node>
1349 const Scalar vals[],
1350 const LO numColInds)
const
1352 #ifdef HAVE_TPETRA_DEBUG
1353 const char prefix[] =
1354 "Tpetra::BlockCrsMatrix::sumIntoLocalValues: ";
1355 #endif // HAVE_TPETRA_DEBUG
1357 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1362 return static_cast<LO
> (0);
1367 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1368 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1369 const LO perBlockSize = this->offsetPerBlock ();
1374 #ifdef HAVE_TPETRA_DEBUG
1375 TEUCHOS_TEST_FOR_EXCEPTION
1376 (this->need_sync_host (), std::runtime_error,
1377 prefix <<
"The matrix's data were last modified on device, but have not "
1378 "been sync'd to host. Please sync to host (by calling "
1379 "sync<Kokkos::HostSpace>() on this matrix) before calling this method.");
1380 #endif // HAVE_TPETRA_DEBUG
1382 auto vals_host_out =
1386 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1387 const LO relBlockOffset =
1388 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1389 if (relBlockOffset != LINV) {
1398 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1402 vals_host_out_raw + absBlockOffset * perBlockSize;
1407 for (LO i = 0; i < perBlockSize; ++i) {
1408 A_old[i] += A_new[i];
1410 hint = relBlockOffset + 1;
1417 template<
class Scalar,
class LO,
class GO,
class Node>
1425 #ifdef HAVE_TPETRA_DEBUG
1426 const char prefix[] =
1427 "Tpetra::BlockCrsMatrix::getLocalRowView: ";
1428 #endif // HAVE_TPETRA_DEBUG
1430 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1434 return Teuchos::OrdinalTraits<LO>::invalid ();
1437 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
1438 colInds = indHost_.data () + absBlockOffsetStart;
1440 #ifdef HAVE_TPETRA_DEBUG
1441 TEUCHOS_TEST_FOR_EXCEPTION
1442 (this->need_sync_host (), std::runtime_error,
1443 prefix <<
"The matrix's data were last modified on device, but have "
1444 "not been sync'd to host. Please sync to host (by calling "
1445 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1447 #endif // HAVE_TPETRA_DEBUG
1449 auto vals_host_out = getValuesHost ();
1452 absBlockOffsetStart * offsetPerBlock ();
1453 vals =
reinterpret_cast<Scalar*
> (vOut);
1455 numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
1460 template<
class Scalar,
class LO,
class GO,
class Node>
1464 const Teuchos::ArrayView<LO>& Indices,
1465 const Teuchos::ArrayView<Scalar>& Values,
1466 size_t &NumEntries)
const
1471 getLocalRowView(LocalRow,colInds,vals,numInds);
1472 if (numInds > Indices.size() || numInds*blockSize_*blockSize_ > Values.size()) {
1473 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
1474 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
1475 << numInds <<
" row entries");
1477 for (LO i=0; i<numInds; ++i) {
1478 Indices[i] = colInds[i];
1480 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1481 Values[i] = vals[i];
1483 NumEntries = numInds;
1486 template<
class Scalar,
class LO,
class GO,
class Node>
1490 ptrdiff_t offsets[],
1492 const LO numColInds)
const
1494 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1499 return static_cast<LO
> (0);
1502 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1506 for (LO k = 0; k < numColInds; ++k) {
1507 const LO relBlockOffset =
1508 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1509 if (relBlockOffset != LINV) {
1510 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
1511 hint = relBlockOffset + 1;
1519 template<
class Scalar,
class LO,
class GO,
class Node>
1523 const ptrdiff_t offsets[],
1524 const Scalar vals[],
1525 const LO numOffsets)
const
1527 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1532 return static_cast<LO
> (0);
1536 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1537 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1538 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1539 size_t pointOffset = 0;
1542 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1543 const size_t relBlockOffset = offsets[k];
1544 if (relBlockOffset != STINV) {
1545 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1546 little_block_type A_old =
1547 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1548 const_little_block_type A_new =
1549 getConstLocalBlockFromInput (vIn, pointOffset);
1550 COPY (A_new, A_old);
1558 template<
class Scalar,
class LO,
class GO,
class Node>
1562 const ptrdiff_t offsets[],
1563 const Scalar vals[],
1564 const LO numOffsets)
const
1566 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1571 return static_cast<LO
> (0);
1575 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1576 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1577 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1578 size_t pointOffset = 0;
1581 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1582 const size_t relBlockOffset = offsets[k];
1583 if (relBlockOffset != STINV) {
1584 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1585 little_block_type A_old =
1586 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1587 const_little_block_type A_new =
1588 getConstLocalBlockFromInput (vIn, pointOffset);
1597 template<
class Scalar,
class LO,
class GO,
class Node>
1601 const ptrdiff_t offsets[],
1602 const Scalar vals[],
1603 const LO numOffsets)
const
1605 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1610 return static_cast<LO
> (0);
1615 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1616 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1617 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1618 size_t pointOffset = 0;
1621 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1622 const size_t relBlockOffset = offsets[k];
1623 if (relBlockOffset != STINV) {
1624 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1625 little_block_type A_old =
1626 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1627 const_little_block_type A_new =
1628 getConstLocalBlockFromInput (vIn, pointOffset);
1630 AXPY (ONE, A_new, A_old);
1638 template<
class Scalar,
class LO,
class GO,
class Node>
1643 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
1644 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1645 return static_cast<LO
> (0);
1647 return static_cast<LO
> (numEntInGraph);
1651 template<
class Scalar,
class LO,
class GO,
class Node>
1656 const Teuchos::ETransp mode,
1666 TEUCHOS_TEST_FOR_EXCEPTION(
1667 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
1668 "transpose and conjugate transpose modes are not yet implemented.");
1671 template<
class Scalar,
class LO,
class GO,
class Node>
1673 BlockCrsMatrix<Scalar, LO, GO, Node>::
1674 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1675 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1681 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1682 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1683 const Scalar zero = STS::zero ();
1684 const Scalar one = STS::one ();
1685 RCP<const import_type>
import = graph_.getImporter ();
1687 RCP<const export_type> theExport = graph_.getExporter ();
1688 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1690 if (alpha == zero) {
1694 else if (beta != one) {
1699 const BMV* X_colMap = NULL;
1700 if (
import.is_null ()) {
1704 catch (std::exception& e) {
1705 TEUCHOS_TEST_FOR_EXCEPTION
1706 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1707 "operator= threw an exception: " << std::endl << e.what ());
1717 if ((*X_colMap_).is_null () ||
1718 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1719 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1720 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1721 static_cast<LO
> (X.getNumVectors ())));
1723 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1727 X_colMap = &(**X_colMap_);
1729 catch (std::exception& e) {
1730 TEUCHOS_TEST_FOR_EXCEPTION
1731 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1732 "operator= threw an exception: " << std::endl << e.what ());
1736 BMV* Y_rowMap = NULL;
1737 if (theExport.is_null ()) {
1740 else if ((*Y_rowMap_).is_null () ||
1741 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1742 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1743 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1744 static_cast<LO
> (X.getNumVectors ())));
1746 Y_rowMap = &(**Y_rowMap_);
1748 catch (std::exception& e) {
1749 TEUCHOS_TEST_FOR_EXCEPTION(
1750 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1751 "operator= threw an exception: " << std::endl << e.what ());
1756 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1758 catch (std::exception& e) {
1759 TEUCHOS_TEST_FOR_EXCEPTION
1760 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1761 "an exception: " << e.what ());
1764 TEUCHOS_TEST_FOR_EXCEPTION
1765 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1766 "an exception not a subclass of std::exception.");
1769 if (! theExport.is_null ()) {
1775 template<
class Scalar,
class LO,
class GO,
class Node>
1777 BlockCrsMatrix<Scalar, LO, GO, Node>::
1778 localApplyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1779 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1783 using ::Tpetra::Impl::bcrsLocalApplyNoTrans;
1785 const impl_scalar_type alpha_impl = alpha;
1786 const auto graph = this->graph_.getLocalGraph ();
1787 const impl_scalar_type beta_impl = beta;
1788 const LO blockSize = this->getBlockSize ();
1790 auto X_mv = X.getMultiVectorView ();
1791 auto Y_mv = Y.getMultiVectorView ();
1792 Y_mv.template modify<device_type> ();
1794 auto X_lcl = X_mv.template getLocalView<device_type> ();
1795 auto Y_lcl = Y_mv.template getLocalView<device_type> ();
1796 auto val = this->val_.template view<device_type> ();
1798 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1802 template<
class Scalar,
class LO,
class GO,
class Node>
1804 BlockCrsMatrix<Scalar, LO, GO, Node>::
1805 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1806 const LO colIndexToFind,
1807 const LO hint)
const
1809 const size_t absStartOffset = ptrHost_[localRowIndex];
1810 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1811 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1813 const LO*
const curInd = indHost_.data () + absStartOffset;
1816 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1823 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1828 const LO maxNumEntriesForLinearSearch = 32;
1829 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1834 const LO* beg = curInd;
1835 const LO* end = curInd + numEntriesInRow;
1836 std::pair<const LO*, const LO*> p =
1837 std::equal_range (beg, end, colIndexToFind);
1838 if (p.first != p.second) {
1840 relOffset =
static_cast<LO
> (p.first - beg);
1844 for (LO k = 0; k < numEntriesInRow; ++k) {
1845 if (colIndexToFind == curInd[k]) {
1855 template<
class Scalar,
class LO,
class GO,
class Node>
1857 BlockCrsMatrix<Scalar, LO, GO, Node>::
1858 offsetPerBlock ()
const
1860 return offsetPerBlock_;
1863 template<
class Scalar,
class LO,
class GO,
class Node>
1864 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1865 BlockCrsMatrix<Scalar, LO, GO, Node>::
1866 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1867 const size_t pointOffset)
const
1870 const LO rowStride = blockSize_;
1871 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1874 template<
class Scalar,
class LO,
class GO,
class Node>
1875 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1876 BlockCrsMatrix<Scalar, LO, GO, Node>::
1877 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1878 const size_t pointOffset)
const
1881 const LO rowStride = blockSize_;
1882 return little_block_type (val + pointOffset, blockSize_, rowStride);
1885 template<
class Scalar,
class LO,
class GO,
class Node>
1886 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1887 BlockCrsMatrix<Scalar, LO, GO, Node>::
1888 getConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const
1890 #ifdef HAVE_TPETRA_DEBUG
1891 const char prefix[] =
1892 "Tpetra::BlockCrsMatrix::getConstLocalBlockFromAbsOffset: ";
1893 #endif // HAVE_TPETRA_DEBUG
1895 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
1899 return const_little_block_type ();
1902 #ifdef HAVE_TPETRA_DEBUG
1903 TEUCHOS_TEST_FOR_EXCEPTION
1904 (this->need_sync_host (), std::runtime_error,
1905 prefix <<
"The matrix's data were last modified on device, but have "
1906 "not been sync'd to host. Please sync to host (by calling "
1907 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1909 #endif // HAVE_TPETRA_DEBUG
1910 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
1912 auto vals_host = getValuesHost ();
1913 const impl_scalar_type* vals_host_raw = vals_host.data ();
1915 return getConstLocalBlockFromInput (vals_host_raw, absPointOffset);
1919 template<
class Scalar,
class LO,
class GO,
class Node>
1920 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1921 BlockCrsMatrix<Scalar, LO, GO, Node>::
1922 getConstLocalBlockFromRelOffset (
const LO lclMeshRow,
1923 const size_t relMeshOffset)
const
1925 typedef impl_scalar_type IST;
1927 const LO* lclColInds = NULL;
1928 Scalar* lclVals = NULL;
1931 LO err = this->getLocalRowView (lclMeshRow, lclColInds, lclVals, numEnt);
1936 return const_little_block_type ();
1939 const size_t relPointOffset = relMeshOffset * this->offsetPerBlock ();
1940 IST* lclValsImpl =
reinterpret_cast<IST*
> (lclVals);
1941 return this->getConstLocalBlockFromInput (const_cast<const IST*> (lclValsImpl),
1946 template<
class Scalar,
class LO,
class GO,
class Node>
1947 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1948 BlockCrsMatrix<Scalar, LO, GO, Node>::
1949 getNonConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const
1951 #ifdef HAVE_TPETRA_DEBUG
1952 const char prefix[] =
1953 "Tpetra::BlockCrsMatrix::getNonConstLocalBlockFromAbsOffset: ";
1954 #endif // HAVE_TPETRA_DEBUG
1956 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
1960 return little_block_type ();
1963 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
1964 #ifdef HAVE_TPETRA_DEBUG
1965 TEUCHOS_TEST_FOR_EXCEPTION
1966 (this->need_sync_host (), std::runtime_error,
1967 prefix <<
"The matrix's data were last modified on device, but have "
1968 "not been sync'd to host. Please sync to host (by calling "
1969 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1971 #endif // HAVE_TPETRA_DEBUG
1972 auto vals_host = getValuesHost();
1973 impl_scalar_type* vals_host_raw = vals_host.data ();
1974 return getNonConstLocalBlockFromInput (vals_host_raw, absPointOffset);
1978 template<
class Scalar,
class LO,
class GO,
class Node>
1979 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1980 BlockCrsMatrix<Scalar, LO, GO, Node>::
1981 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const
1983 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1984 const LO relBlockOffset =
1985 this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1987 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1988 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1989 return getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1992 return little_block_type ();
2006 template<
class Scalar,
class LO,
class GO,
class Node>
2008 BlockCrsMatrix<Scalar, LO, GO, Node>::
2009 checkSizes (const ::Tpetra::SrcDistObject& source)
2012 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2013 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2016 std::ostream& err = markLocalErrorAndGetStream ();
2017 err <<
"checkSizes: The source object of the Import or Export "
2018 "must be a BlockCrsMatrix with the same template parameters as the "
2019 "target object." << endl;
2024 if (src->getBlockSize () != this->getBlockSize ()) {
2025 std::ostream& err = markLocalErrorAndGetStream ();
2026 err <<
"checkSizes: The source and target objects of the Import or "
2027 <<
"Export must have the same block sizes. The source's block "
2028 <<
"size = " << src->getBlockSize () <<
" != the target's block "
2029 <<
"size = " << this->getBlockSize () <<
"." << endl;
2031 if (! src->graph_.isFillComplete ()) {
2032 std::ostream& err = markLocalErrorAndGetStream ();
2033 err <<
"checkSizes: The source object of the Import or Export is "
2034 "not fill complete. Both source and target objects must be fill "
2035 "complete." << endl;
2037 if (! this->graph_.isFillComplete ()) {
2038 std::ostream& err = markLocalErrorAndGetStream ();
2039 err <<
"checkSizes: The target object of the Import or Export is "
2040 "not fill complete. Both source and target objects must be fill "
2041 "complete." << endl;
2043 if (src->graph_.getColMap ().is_null ()) {
2044 std::ostream& err = markLocalErrorAndGetStream ();
2045 err <<
"checkSizes: The source object of the Import or Export does "
2046 "not have a column Map. Both source and target objects must have "
2047 "column Maps." << endl;
2049 if (this->graph_.getColMap ().is_null ()) {
2050 std::ostream& err = markLocalErrorAndGetStream ();
2051 err <<
"checkSizes: The target object of the Import or Export does "
2052 "not have a column Map. Both source and target objects must have "
2053 "column Maps." << endl;
2057 return ! (* (this->localError_));
2060 template<
class Scalar,
class LO,
class GO,
class Node>
2064 (const ::Tpetra::SrcDistObject& source,
2065 const size_t numSameIDs,
2071 using ::Tpetra::Details::Behavior;
2073 using ::Tpetra::Details::ProfilingRegion;
2077 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
2078 const bool debug = Behavior::debug();
2079 const bool verbose = Behavior::verbose();
2084 std::ostringstream os;
2085 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2086 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
2091 if (* (this->localError_)) {
2092 std::ostream& err = this->markLocalErrorAndGetStream ();
2094 <<
"The target object of the Import or Export is already in an error state."
2103 std::ostringstream os;
2104 os << prefix << endl
2107 std::cerr << os.str ();
2113 if (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0)) {
2114 std::ostream& err = this->markLocalErrorAndGetStream ();
2116 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent (0)
2117 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
2121 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
2122 std::ostream& err = this->markLocalErrorAndGetStream ();
2124 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
2129 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2131 std::ostream& err = this->markLocalErrorAndGetStream ();
2133 <<
"The source (input) object of the Import or "
2134 "Export is either not a BlockCrsMatrix, or does not have the right "
2135 "template parameters. checkSizes() should have caught this. "
2136 "Please report this bug to the Tpetra developers." << endl;
2145 const_cast<this_type*
>(src)->sync_host();
2149 bool lclErr =
false;
2150 #ifdef HAVE_TPETRA_DEBUG
2151 std::set<LO> invalidSrcCopyRows;
2152 std::set<LO> invalidDstCopyRows;
2153 std::set<LO> invalidDstCopyCols;
2154 std::set<LO> invalidDstPermuteCols;
2155 std::set<LO> invalidPermuteFromRows;
2156 #endif // HAVE_TPETRA_DEBUG
2165 #ifdef HAVE_TPETRA_DEBUG
2166 const map_type& srcRowMap = * (src->graph_.getRowMap ());
2167 #endif // HAVE_TPETRA_DEBUG
2168 const map_type& dstRowMap = * (this->graph_.getRowMap ());
2169 const map_type& srcColMap = * (src->graph_.getColMap ());
2170 const map_type& dstColMap = * (this->graph_.getColMap ());
2171 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
2173 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.extent(0));
2175 std::ostringstream os;
2177 <<
"canUseLocalColumnIndices: "
2178 << (canUseLocalColumnIndices ?
"true" :
"false")
2179 <<
", numPermute: " << numPermute
2181 std::cerr << os.str ();
2184 const auto permuteToLIDsHost = permuteToLIDs.view_host();
2185 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
2187 if (canUseLocalColumnIndices) {
2189 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2190 #ifdef HAVE_TPETRA_DEBUG
2193 invalidSrcCopyRows.insert (localRow);
2196 #endif // HAVE_TPETRA_DEBUG
2198 const LO* lclSrcCols;
2203 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2206 #ifdef HAVE_TPETRA_DEBUG
2207 (void) invalidSrcCopyRows.insert (localRow);
2208 #endif // HAVE_TPETRA_DEBUG
2211 err = this->replaceLocalValues (localRow, lclSrcCols, vals, numEntries);
2212 if (err != numEntries) {
2215 #ifdef HAVE_TPETRA_DEBUG
2216 invalidDstCopyRows.insert (localRow);
2217 #endif // HAVE_TPETRA_DEBUG
2225 for (LO k = 0; k < numEntries; ++k) {
2228 #ifdef HAVE_TPETRA_DEBUG
2229 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
2230 #endif // HAVE_TPETRA_DEBUG
2239 for (
size_t k = 0; k < numPermute; ++k) {
2240 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
2241 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
2243 const LO* lclSrcCols;
2246 LO err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2249 #ifdef HAVE_TPETRA_DEBUG
2250 invalidPermuteFromRows.insert (srcLclRow);
2251 #endif // HAVE_TPETRA_DEBUG
2254 err = this->replaceLocalValues (dstLclRow, lclSrcCols, vals, numEntries);
2255 if (err != numEntries) {
2257 #ifdef HAVE_TPETRA_DEBUG
2258 for (LO c = 0; c < numEntries; ++c) {
2260 invalidDstPermuteCols.insert (lclSrcCols[c]);
2263 #endif // HAVE_TPETRA_DEBUG
2270 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
2271 Teuchos::Array<LO> lclDstCols (maxNumEnt);
2274 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2275 const LO* lclSrcCols;
2282 err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2283 }
catch (std::exception& e) {
2285 std::ostringstream os;
2286 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2287 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2288 << localRow <<
", src->getLocalRowView() threw an exception: "
2290 std::cerr << os.str ();
2297 #ifdef HAVE_TPETRA_DEBUG
2298 invalidSrcCopyRows.insert (localRow);
2299 #endif // HAVE_TPETRA_DEBUG
2302 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2305 std::ostringstream os;
2306 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2307 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2308 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
2309 << maxNumEnt << endl;
2310 std::cerr << os.str ();
2316 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2317 for (LO j = 0; j < numEntries; ++j) {
2319 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2321 #ifdef HAVE_TPETRA_DEBUG
2322 invalidDstCopyCols.insert (lclDstColsView[j]);
2323 #endif // HAVE_TPETRA_DEBUG
2327 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (), vals, numEntries);
2328 }
catch (std::exception& e) {
2330 std::ostringstream os;
2331 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2332 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2333 << localRow <<
", this->replaceLocalValues() threw an exception: "
2335 std::cerr << os.str ();
2339 if (err != numEntries) {
2342 std::ostringstream os;
2343 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2344 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
2345 "localRow " << localRow <<
", this->replaceLocalValues "
2346 "returned " << err <<
" instead of numEntries = "
2347 << numEntries << endl;
2348 std::cerr << os.str ();
2356 for (
size_t k = 0; k < numPermute; ++k) {
2357 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
2358 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
2360 const LO* lclSrcCols;
2365 err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2366 }
catch (std::exception& e) {
2368 std::ostringstream os;
2369 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2370 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
2371 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
2372 <<
", src->getLocalRowView() threw an exception: " << e.what ();
2373 std::cerr << os.str ();
2380 #ifdef HAVE_TPETRA_DEBUG
2381 invalidPermuteFromRows.insert (srcLclRow);
2382 #endif // HAVE_TPETRA_DEBUG
2385 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2391 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2392 for (LO j = 0; j < numEntries; ++j) {
2394 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2396 #ifdef HAVE_TPETRA_DEBUG
2397 invalidDstPermuteCols.insert (lclDstColsView[j]);
2398 #endif // HAVE_TPETRA_DEBUG
2401 err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (), vals, numEntries);
2402 if (err != numEntries) {
2411 std::ostream& err = this->markLocalErrorAndGetStream ();
2412 #ifdef HAVE_TPETRA_DEBUG
2413 err <<
"copyAndPermute: The graph structure of the source of the "
2414 "Import or Export must be a subset of the graph structure of the "
2416 err <<
"invalidSrcCopyRows = [";
2417 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
2418 it != invalidSrcCopyRows.end (); ++it) {
2420 typename std::set<LO>::const_iterator itp1 = it;
2422 if (itp1 != invalidSrcCopyRows.end ()) {
2426 err <<
"], invalidDstCopyRows = [";
2427 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
2428 it != invalidDstCopyRows.end (); ++it) {
2430 typename std::set<LO>::const_iterator itp1 = it;
2432 if (itp1 != invalidDstCopyRows.end ()) {
2436 err <<
"], invalidDstCopyCols = [";
2437 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
2438 it != invalidDstCopyCols.end (); ++it) {
2440 typename std::set<LO>::const_iterator itp1 = it;
2442 if (itp1 != invalidDstCopyCols.end ()) {
2446 err <<
"], invalidDstPermuteCols = [";
2447 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
2448 it != invalidDstPermuteCols.end (); ++it) {
2450 typename std::set<LO>::const_iterator itp1 = it;
2452 if (itp1 != invalidDstPermuteCols.end ()) {
2456 err <<
"], invalidPermuteFromRows = [";
2457 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
2458 it != invalidPermuteFromRows.end (); ++it) {
2460 typename std::set<LO>::const_iterator itp1 = it;
2462 if (itp1 != invalidPermuteFromRows.end ()) {
2468 err <<
"copyAndPermute: The graph structure of the source of the "
2469 "Import or Export must be a subset of the graph structure of the "
2471 #endif // HAVE_TPETRA_DEBUG
2475 std::ostringstream os;
2476 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2477 const bool lclSuccess = ! (* (this->localError_));
2478 os <<
"*** Proc " << myRank <<
": copyAndPermute "
2479 << (lclSuccess ?
"succeeded" :
"FAILED");
2483 os <<
": error messages: " << this->errorMessages ();
2485 std::cerr << os.str ();
2509 template<
class LO,
class GO>
2511 packRowCount (
const size_t numEnt,
2512 const size_t numBytesPerValue,
2513 const size_t blkSize)
2515 using ::Tpetra::Details::PackTraits;
2526 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2527 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2528 return numEntLen + gidsLen + valsLen;
2542 template<
class ST,
class LO,
class GO>
2544 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
2545 const size_t offset,
2546 const size_t numBytes,
2549 using Kokkos::subview;
2550 using ::Tpetra::Details::PackTraits;
2552 if (numBytes == 0) {
2554 return static_cast<size_t> (0);
2559 TEUCHOS_TEST_FOR_EXCEPTION
2560 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2561 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2563 const char*
const inBuf = imports.data () + offset;
2565 TEUCHOS_TEST_FOR_EXCEPTION
2566 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2567 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2569 return static_cast<size_t> (numEntLO);
2576 template<
class ST,
class LO,
class GO>
2578 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
2579 const size_t offset,
2580 const size_t numEnt,
2581 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
2582 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
2583 const size_t numBytesPerValue,
2584 const size_t blockSize)
2586 using Kokkos::subview;
2587 using ::Tpetra::Details::PackTraits;
2593 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2596 const LO numEntLO =
static_cast<size_t> (numEnt);
2598 const size_t numEntBeg = offset;
2600 const size_t gidsBeg = numEntBeg + numEntLen;
2601 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2602 const size_t valsBeg = gidsBeg + gidsLen;
2603 const size_t valsLen = numScalarEnt * numBytesPerValue;
2605 char*
const numEntOut = exports.data () + numEntBeg;
2606 char*
const gidsOut = exports.data () + gidsBeg;
2607 char*
const valsOut = exports.data () + valsBeg;
2609 size_t numBytesOut = 0;
2614 Kokkos::pair<int, size_t> p;
2615 p = PackTraits<GO>::packArray (gidsOut, gidsIn.data (), numEnt);
2616 errorCode += p.first;
2617 numBytesOut += p.second;
2619 p = PackTraits<ST>::packArray (valsOut, valsIn.data (), numScalarEnt);
2620 errorCode += p.first;
2621 numBytesOut += p.second;
2624 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2625 TEUCHOS_TEST_FOR_EXCEPTION
2626 (numBytesOut != expectedNumBytes, std::logic_error,
2627 "packRowForBlockCrs: numBytesOut = " << numBytesOut
2628 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2630 TEUCHOS_TEST_FOR_EXCEPTION
2631 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
2632 "PackTraits::packArray returned a nonzero error code");
2638 template<
class ST,
class LO,
class GO>
2640 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
2641 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
2642 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
2643 const size_t offset,
2644 const size_t numBytes,
2645 const size_t numEnt,
2646 const size_t numBytesPerValue,
2647 const size_t blockSize)
2649 using ::Tpetra::Details::PackTraits;
2651 if (numBytes == 0) {
2655 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2656 TEUCHOS_TEST_FOR_EXCEPTION
2657 (static_cast<size_t> (imports.extent (0)) <= offset,
2658 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2659 << imports.extent (0) <<
" <= offset = " << offset <<
".");
2660 TEUCHOS_TEST_FOR_EXCEPTION
2661 (static_cast<size_t> (imports.extent (0)) < offset + numBytes,
2662 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2663 << imports.extent (0) <<
" < offset + numBytes = "
2664 << (offset + numBytes) <<
".");
2669 const size_t numEntBeg = offset;
2671 const size_t gidsBeg = numEntBeg + numEntLen;
2672 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2673 const size_t valsBeg = gidsBeg + gidsLen;
2674 const size_t valsLen = numScalarEnt * numBytesPerValue;
2676 const char*
const numEntIn = imports.data () + numEntBeg;
2677 const char*
const gidsIn = imports.data () + gidsBeg;
2678 const char*
const valsIn = imports.data () + valsBeg;
2680 size_t numBytesOut = 0;
2684 TEUCHOS_TEST_FOR_EXCEPTION
2685 (static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2686 "unpackRowForBlockCrs: Expected number of entries " << numEnt
2687 <<
" != actual number of entries " << numEntOut <<
".");
2690 Kokkos::pair<int, size_t> p;
2691 p = PackTraits<GO>::unpackArray (gidsOut.data (), gidsIn, numEnt);
2692 errorCode += p.first;
2693 numBytesOut += p.second;
2695 p = PackTraits<ST>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
2696 errorCode += p.first;
2697 numBytesOut += p.second;
2700 TEUCHOS_TEST_FOR_EXCEPTION
2701 (numBytesOut != numBytes, std::logic_error,
2702 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2703 <<
" != numBytes = " << numBytes <<
".");
2705 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2706 TEUCHOS_TEST_FOR_EXCEPTION
2707 (numBytesOut != expectedNumBytes, std::logic_error,
2708 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2709 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2711 TEUCHOS_TEST_FOR_EXCEPTION
2712 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
2713 "PackTraits::unpackArray returned a nonzero error code");
2719 template<
class Scalar,
class LO,
class GO,
class Node>
2723 (const ::Tpetra::SrcDistObject& source,
2728 Kokkos::DualView<
size_t*,
2730 size_t& constantNumPackets,
2733 using ::Tpetra::Details::Behavior;
2735 using ::Tpetra::Details::ProfilingRegion;
2736 using ::Tpetra::Details::PackTraits;
2738 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space host_exec;
2742 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
2744 const bool debug = Behavior::debug();
2745 const bool verbose = Behavior::verbose();
2750 std::ostringstream os;
2751 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2752 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
2757 if (* (this->localError_)) {
2758 std::ostream& err = this->markLocalErrorAndGetStream ();
2760 <<
"The target object of the Import or Export is already in an error state."
2769 std::ostringstream os;
2770 os << prefix << std::endl
2774 std::cerr << os.str ();
2780 if (exportLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2781 std::ostream& err = this->markLocalErrorAndGetStream ();
2783 <<
"exportLIDs.extent(0) = " << exportLIDs.extent (0)
2784 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2785 <<
"." << std::endl;
2788 if (exportLIDs.need_sync_host ()) {
2789 std::ostream& err = this->markLocalErrorAndGetStream ();
2790 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
2794 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2796 std::ostream& err = this->markLocalErrorAndGetStream ();
2798 <<
"The source (input) object of the Import or "
2799 "Export is either not a BlockCrsMatrix, or does not have the right "
2800 "template parameters. checkSizes() should have caught this. "
2801 "Please report this bug to the Tpetra developers." << std::endl;
2813 constantNumPackets = 0;
2817 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2818 const size_t numExportLIDs = exportLIDs.extent (0);
2819 const size_t numBytesPerValue =
2820 PackTraits<impl_scalar_type>
2821 ::packValueCount(this->val_.extent(0) ? this->val_.view_host()(0) :
impl_scalar_type());
2827 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
2830 auto exportLIDsHost = exportLIDs.view_host();
2831 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2832 numPacketsPerLID.modify_host ();
2834 using reducer_type = Impl::BlockCrsReducer<Impl::BlockCrsRowStruct<size_t>,host_exec>;
2835 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs);
2836 Kokkos::parallel_reduce
2838 [=](
const size_t &i,
typename reducer_type::value_type &update) {
2839 const LO lclRow = exportLIDsHost(i);
2841 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid () ? 0 : numEnt);
2843 const size_t numBytes =
2844 packRowCount<LO, GO> (numEnt, numBytesPerValue, blockSize);
2845 numPacketsPerLIDHost(i) = numBytes;
2846 update +=
typename reducer_type::value_type(numEnt, numBytes, numEnt);
2847 }, rowReducerStruct);
2853 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
2854 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
2855 const size_t maxRowLength = rowReducerStruct.maxRowLength;
2858 std::ostringstream os;
2860 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2862 std::cerr << os.str ();
2868 if(exports.extent(0) != totalNumBytes)
2870 const std::string oldLabel = exports.d_view.label ();
2871 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
2872 exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
2874 if (totalNumEntries > 0) {
2876 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs+1);
2878 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs+1);
2879 Kokkos::parallel_scan
2881 [=](
const size_t &i,
size_t &update,
const bool &
final) {
2882 if (
final) offset(i) = update;
2883 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
2886 if (offset(numExportLIDs) != totalNumBytes) {
2887 std::ostream& err = this->markLocalErrorAndGetStream ();
2889 <<
"At end of method, the final offset (in bytes) "
2890 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
2891 << totalNumBytes <<
". "
2892 <<
"Please report this bug to the Tpetra developers." << std::endl;
2906 exports.modify_host();
2908 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2910 policy_type(numExportLIDs, 1, 1)
2911 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2912 Kokkos::parallel_for
2914 [=](
const typename policy_type::member_type &member) {
2915 const size_t i = member.league_rank();
2916 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2917 gblColInds(member.team_scratch(0), maxRowLength);
2919 const LO lclRowInd = exportLIDsHost(i);
2920 const LO* lclColIndsRaw;
2926 (void) src->getLocalRowView (lclRowInd, lclColIndsRaw, valsRaw, numEntLO);
2928 const size_t numEnt =
static_cast<size_t> (numEntLO);
2929 Kokkos::View<const LO*,host_exec> lclColInds (lclColIndsRaw, numEnt);
2932 for (
size_t j = 0; j < numEnt; ++j)
2939 const size_t numBytes =
2940 packRowForBlockCrs<impl_scalar_type, LO, GO>
2941 (exports.view_host(),
2944 Kokkos::View<const GO*, host_exec>(gblColInds.data(), maxRowLength),
2945 Kokkos::View<const impl_scalar_type*, host_exec>(reinterpret_cast<const impl_scalar_type*>(valsRaw), numEnt*blockSize*blockSize),
2951 const size_t offsetDiff = offset(i+1) - offset(i);
2952 if (numBytes != offsetDiff) {
2953 std::ostringstream os;
2955 <<
"numBytes computed from packRowForBlockCrs is different from "
2956 <<
"precomputed offset values, LID = " << i << std::endl;
2957 std::cerr << os.str ();
2965 std::ostringstream os;
2966 const bool lclSuccess = ! (* (this->localError_));
2968 << (lclSuccess ?
"succeeded" :
"FAILED")
2970 std::cerr << os.str ();
2974 template<
class Scalar,
class LO,
class GO,
class Node>
2982 Kokkos::DualView<
size_t*,
2988 using ::Tpetra::Details::Behavior;
2990 using ::Tpetra::Details::ProfilingRegion;
2991 using ::Tpetra::Details::PackTraits;
2994 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
2996 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2997 const bool verbose = Behavior::verbose ();
3002 std::ostringstream os;
3003 auto map = this->graph_.getRowMap ();
3004 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
3005 const int myRank = comm.is_null () ? -1 : comm->getRank ();
3006 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
3009 os <<
"Start" << endl;
3010 std::cerr << os.str ();
3015 if (* (this->localError_)) {
3016 std::ostream& err = this->markLocalErrorAndGetStream ();
3017 std::ostringstream os;
3018 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
3020 std::cerr << os.str ();
3029 if (importLIDs.extent (0) != numPacketsPerLID.extent (0)) {
3030 std::ostream& err = this->markLocalErrorAndGetStream ();
3031 std::ostringstream os;
3032 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent (0)
3033 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
3036 std::cerr << os.str ();
3042 if (combineMode !=
ADD && combineMode !=
INSERT &&
3044 combineMode !=
ZERO) {
3045 std::ostream& err = this->markLocalErrorAndGetStream ();
3046 std::ostringstream os;
3048 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
3049 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
3052 std::cerr << os.str ();
3058 if (this->graph_.getColMap ().is_null ()) {
3059 std::ostream& err = this->markLocalErrorAndGetStream ();
3060 std::ostringstream os;
3061 os << prefix <<
"Target matrix's column Map is null." << endl;
3063 std::cerr << os.str ();
3072 const map_type& tgtColMap = * (this->graph_.getColMap ());
3075 const size_t blockSize = this->getBlockSize ();
3076 const size_t numImportLIDs = importLIDs.extent(0);
3083 const size_t numBytesPerValue =
3084 PackTraits<impl_scalar_type>::packValueCount
3085 (this->val_.extent (0) ? this->val_.view_host () (0) :
impl_scalar_type ());
3086 const size_t maxRowNumEnt = graph_.getNodeMaxNumRowEntries ();
3087 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
3090 std::ostringstream os;
3091 os << prefix <<
"combineMode: "
3092 << ::Tpetra::combineModeToString (combineMode)
3093 <<
", blockSize: " << blockSize
3094 <<
", numImportLIDs: " << numImportLIDs
3095 <<
", numBytesPerValue: " << numBytesPerValue
3096 <<
", maxRowNumEnt: " << maxRowNumEnt
3097 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
3098 std::cerr << os.str ();
3101 if (combineMode ==
ZERO || numImportLIDs == 0) {
3103 std::ostringstream os;
3104 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
3105 std::cerr << os.str ();
3112 if (imports.need_sync_host ()) {
3113 imports.sync_host ();
3123 if (imports.need_sync_host () || numPacketsPerLID.need_sync_host () ||
3124 importLIDs.need_sync_host ()) {
3125 std::ostream& err = this->markLocalErrorAndGetStream ();
3126 std::ostringstream os;
3127 os << prefix <<
"All input DualViews must be sync'd to host by now. "
3128 <<
"imports_nc.need_sync_host()="
3129 << (imports.need_sync_host () ?
"true" :
"false")
3130 <<
", numPacketsPerLID.need_sync_host()="
3131 << (numPacketsPerLID.need_sync_host () ?
"true" :
"false")
3132 <<
", importLIDs.need_sync_host()="
3133 << (importLIDs.need_sync_host () ?
"true" :
"false")
3136 std::cerr << os.str ();
3142 const auto importLIDsHost = importLIDs.view_host ();
3143 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host ();
3149 Kokkos::View<size_t*, host_exec> offset (
"offset", numImportLIDs+1);
3151 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numImportLIDs+1);
3152 Kokkos::parallel_scan
3153 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
3154 [=] (
const size_t &i,
size_t &update,
const bool &
final) {
3155 if (
final) offset(i) = update;
3156 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
3166 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
3167 errorDuringUnpack (
"errorDuringUnpack");
3168 errorDuringUnpack () = 0;
3170 using policy_type = Kokkos::TeamPolicy<host_exec>;
3171 const auto policy = policy_type (numImportLIDs, 1, 1)
3172 .set_scratch_size (0, Kokkos::PerTeam (
sizeof (GO) * maxRowNumEnt +
3173 sizeof (LO) * maxRowNumEnt +
3174 numBytesPerValue * maxRowNumScalarEnt));
3175 using host_scratch_space =
typename host_exec::scratch_memory_space;
3176 using pair_type = Kokkos::pair<size_t, size_t>;
3177 Kokkos::parallel_for
3178 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
3179 [=] (
const typename policy_type::member_type& member) {
3180 const size_t i = member.league_rank();
3182 Kokkos::View<GO*, host_scratch_space> gblColInds
3183 (member.team_scratch (0), maxRowNumEnt);
3184 Kokkos::View<LO*, host_scratch_space> lclColInds
3185 (member.team_scratch (0), maxRowNumEnt);
3186 Kokkos::View<impl_scalar_type*, host_scratch_space> vals
3187 (member.team_scratch (0), maxRowNumScalarEnt);
3189 const size_t offval = offset(i);
3190 const LO lclRow = importLIDsHost(i);
3191 const size_t numBytes = numPacketsPerLIDHost(i);
3192 const size_t numEnt =
3193 unpackRowCount<impl_scalar_type, LO, GO>
3194 (imports.view_host (), offval, numBytes, numBytesPerValue);
3197 if (numEnt > maxRowNumEnt) {
3198 errorDuringUnpack() = 1;
3200 std::ostringstream os;
3202 <<
"At i = " << i <<
", numEnt = " << numEnt
3203 <<
" > maxRowNumEnt = " << maxRowNumEnt
3205 std::cerr << os.str ();
3210 const size_t numScalarEnt = numEnt*blockSize*blockSize;
3211 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
3212 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
3213 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
3218 size_t numBytesOut = 0;
3221 unpackRowForBlockCrs<impl_scalar_type, LO, GO>
3222 (Kokkos::View<GO*,host_exec>(gidsOut.data(), numEnt),
3223 Kokkos::View<impl_scalar_type*,host_exec>(valsOut.data(), numScalarEnt),
3224 imports.view_host(),
3225 offval, numBytes, numEnt,
3226 numBytesPerValue, blockSize);
3228 catch (std::exception& e) {
3229 errorDuringUnpack() = 1;
3231 std::ostringstream os;
3232 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
3233 << e.what () << endl;
3234 std::cerr << os.str ();
3239 if (numBytes != numBytesOut) {
3240 errorDuringUnpack() = 1;
3242 std::ostringstream os;
3244 <<
"At i = " << i <<
", numBytes = " << numBytes
3245 <<
" != numBytesOut = " << numBytesOut <<
"."
3247 std::cerr << os.str();
3253 for (
size_t k = 0; k < numEnt; ++k) {
3255 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
3257 std::ostringstream os;
3259 <<
"At i = " << i <<
", GID " << gidsOut(k)
3260 <<
" is not owned by the calling process."
3262 std::cerr << os.str();
3270 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
3271 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>
3273 if (combineMode ==
ADD) {
3274 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3275 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
3276 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3277 }
else if (combineMode ==
ABSMAX) {
3278 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3281 if (static_cast<LO> (numEnt) != numCombd) {
3282 errorDuringUnpack() = 1;
3284 std::ostringstream os;
3285 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3286 <<
" != numCombd = " << numCombd <<
"."
3288 std::cerr << os.str ();
3295 if (errorDuringUnpack () != 0) {
3296 std::ostream& err = this->markLocalErrorAndGetStream ();
3297 err << prefix <<
"Unpacking failed.";
3299 err <<
" Please run again with the environment variable setting "
3300 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
3306 std::ostringstream os;
3307 const bool lclSuccess = ! (* (this->localError_));
3309 << (lclSuccess ?
"succeeded" :
"FAILED")
3311 std::cerr << os.str ();
3315 template<
class Scalar,
class LO,
class GO,
class Node>
3319 using Teuchos::TypeNameTraits;
3320 std::ostringstream os;
3321 os <<
"\"Tpetra::BlockCrsMatrix\": { "
3322 <<
"Template parameters: { "
3323 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
3324 <<
"LO: " << TypeNameTraits<LO>::name ()
3325 <<
"GO: " << TypeNameTraits<GO>::name ()
3326 <<
"Node: " << TypeNameTraits<Node>::name ()
3328 <<
", Label: \"" << this->getObjectLabel () <<
"\""
3329 <<
", Global dimensions: ["
3330 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3331 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
3332 <<
", Block size: " << getBlockSize ()
3338 template<
class Scalar,
class LO,
class GO,
class Node>
3342 const Teuchos::EVerbosityLevel verbLevel)
const
3344 using Teuchos::ArrayRCP;
3345 using Teuchos::CommRequest;
3346 using Teuchos::FancyOStream;
3347 using Teuchos::getFancyOStream;
3348 using Teuchos::ireceive;
3349 using Teuchos::isend;
3350 using Teuchos::outArg;
3351 using Teuchos::TypeNameTraits;
3352 using Teuchos::VERB_DEFAULT;
3353 using Teuchos::VERB_NONE;
3354 using Teuchos::VERB_LOW;
3355 using Teuchos::VERB_MEDIUM;
3357 using Teuchos::VERB_EXTREME;
3359 using Teuchos::wait;
3361 #ifdef HAVE_TPETRA_DEBUG
3362 const char prefix[] =
"Tpetra::BlockCrsMatrix::describe: ";
3363 #endif // HAVE_TPETRA_DEBUG
3366 const Teuchos::EVerbosityLevel vl =
3367 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
3369 if (vl == VERB_NONE) {
3374 Teuchos::OSTab tab0 (out);
3376 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
3377 Teuchos::OSTab tab1 (out);
3379 out <<
"Template parameters:" << endl;
3381 Teuchos::OSTab tab2 (out);
3382 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
3383 <<
"LO: " << TypeNameTraits<LO>::name () << endl
3384 <<
"GO: " << TypeNameTraits<GO>::name () << endl
3385 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
3387 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
3388 <<
"Global dimensions: ["
3389 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3390 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
3392 const LO blockSize = getBlockSize ();
3393 out <<
"Block size: " << blockSize << endl;
3396 if (vl >= VERB_MEDIUM) {
3397 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3398 const int myRank = comm.getRank ();
3400 out <<
"Row Map:" << endl;
3402 getRowMap()->describe (out, vl);
3404 if (! getColMap ().is_null ()) {
3405 if (getColMap() == getRowMap()) {
3407 out <<
"Column Map: same as row Map" << endl;
3412 out <<
"Column Map:" << endl;
3414 getColMap ()->describe (out, vl);
3417 if (! getDomainMap ().is_null ()) {
3418 if (getDomainMap () == getRowMap ()) {
3420 out <<
"Domain Map: same as row Map" << endl;
3423 else if (getDomainMap () == getColMap ()) {
3425 out <<
"Domain Map: same as column Map" << endl;
3430 out <<
"Domain Map:" << endl;
3432 getDomainMap ()->describe (out, vl);
3435 if (! getRangeMap ().is_null ()) {
3436 if (getRangeMap () == getDomainMap ()) {
3438 out <<
"Range Map: same as domain Map" << endl;
3441 else if (getRangeMap () == getRowMap ()) {
3443 out <<
"Range Map: same as row Map" << endl;
3448 out <<
"Range Map: " << endl;
3450 getRangeMap ()->describe (out, vl);
3455 if (vl >= VERB_EXTREME) {
3461 const_cast<this_type&
> (*this).
sync_host ();
3463 #ifdef HAVE_TPETRA_DEBUG
3464 TEUCHOS_TEST_FOR_EXCEPTION
3465 (this->need_sync_host (), std::logic_error,
3466 prefix <<
"Right after sync to host, the matrix claims that it needs "
3467 "sync to host. Please report this bug to the Tpetra developers.");
3468 #endif // HAVE_TPETRA_DEBUG
3470 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3471 const int myRank = comm.getRank ();
3472 const int numProcs = comm.getSize ();
3475 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
3476 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
3477 FancyOStream& os = *osPtr;
3478 os <<
"Process " << myRank <<
":" << endl;
3479 Teuchos::OSTab tab2 (os);
3481 const map_type& meshRowMap = * (graph_.getRowMap ());
3482 const map_type& meshColMap = * (graph_.getColMap ());
3487 os <<
"Row " << meshGblRow <<
": {";
3489 const LO* lclColInds = NULL;
3490 Scalar* vals = NULL;
3492 this->getLocalRowView (meshLclRow, lclColInds, vals, numInds);
3494 for (LO k = 0; k < numInds; ++k) {
3497 os <<
"Col " << gblCol <<
": [";
3498 for (LO i = 0; i < blockSize; ++i) {
3499 for (LO j = 0; j < blockSize; ++j) {
3500 os << vals[blockSize*blockSize*k + i*blockSize + j];
3501 if (j + 1 < blockSize) {
3505 if (i + 1 < blockSize) {
3510 if (k + 1 < numInds) {
3520 out << lclOutStrPtr->str ();
3521 lclOutStrPtr = Teuchos::null;
3524 const int sizeTag = 1337;
3525 const int dataTag = 1338;
3527 ArrayRCP<char> recvDataBuf;
3531 for (
int p = 1; p < numProcs; ++p) {
3534 ArrayRCP<size_t> recvSize (1);
3536 RCP<CommRequest<int> > recvSizeReq =
3537 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
3538 wait<int> (comm, outArg (recvSizeReq));
3539 const size_t numCharsToRecv = recvSize[0];
3546 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
3547 recvDataBuf.resize (numCharsToRecv + 1);
3549 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
3551 RCP<CommRequest<int> > recvDataReq =
3552 ireceive<int, char> (recvData, p, dataTag, comm);
3553 wait<int> (comm, outArg (recvDataReq));
3558 recvDataBuf[numCharsToRecv] =
'\0';
3559 out << recvDataBuf.getRawPtr ();
3561 else if (myRank == p) {
3565 const std::string stringToSend = lclOutStrPtr->str ();
3566 lclOutStrPtr = Teuchos::null;
3569 const size_t numCharsToSend = stringToSend.size ();
3570 ArrayRCP<size_t> sendSize (1);
3571 sendSize[0] = numCharsToSend;
3572 RCP<CommRequest<int> > sendSizeReq =
3573 isend<int, size_t> (sendSize, 0, sizeTag, comm);
3574 wait<int> (comm, outArg (sendSizeReq));
3582 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
3583 RCP<CommRequest<int> > sendDataReq =
3584 isend<int, char> (sendData, 0, dataTag, comm);
3585 wait<int> (comm, outArg (sendDataReq));
3591 template<
class Scalar,
class LO,
class GO,
class Node>
3592 Teuchos::RCP<const Teuchos::Comm<int> >
3596 return graph_.getComm();
3600 template<
class Scalar,
class LO,
class GO,
class Node>
3605 return graph_.getGlobalNumCols();
3608 template<
class Scalar,
class LO,
class GO,
class Node>
3613 return graph_.getNodeNumCols();
3616 template<
class Scalar,
class LO,
class GO,
class Node>
3621 return graph_.getIndexBase();
3624 template<
class Scalar,
class LO,
class GO,
class Node>
3629 return graph_.getGlobalNumEntries();
3632 template<
class Scalar,
class LO,
class GO,
class Node>
3637 return graph_.getNodeNumEntries();
3640 template<
class Scalar,
class LO,
class GO,
class Node>
3645 return graph_.getNumEntriesInGlobalRow(globalRow);
3649 template<
class Scalar,
class LO,
class GO,
class Node>
3654 return graph_.getGlobalMaxNumRowEntries();
3657 template<
class Scalar,
class LO,
class GO,
class Node>
3662 return graph_.hasColMap();
3666 template<
class Scalar,
class LO,
class GO,
class Node>
3671 return graph_.isLocallyIndexed();
3674 template<
class Scalar,
class LO,
class GO,
class Node>
3679 return graph_.isGloballyIndexed();
3682 template<
class Scalar,
class LO,
class GO,
class Node>
3687 return graph_.isFillComplete ();
3690 template<
class Scalar,
class LO,
class GO,
class Node>
3699 template<
class Scalar,
class LO,
class GO,
class Node>
3703 const Teuchos::ArrayView<GO> &,
3704 const Teuchos::ArrayView<Scalar> &,
3707 TEUCHOS_TEST_FOR_EXCEPTION(
3708 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
3709 "This class doesn't support global matrix indexing.");
3713 template<
class Scalar,
class LO,
class GO,
class Node>
3717 Teuchos::ArrayView<const GO> &,
3718 Teuchos::ArrayView<const Scalar> &)
const
3720 TEUCHOS_TEST_FOR_EXCEPTION(
3721 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
3722 "This class doesn't support global matrix indexing.");
3726 template<
class Scalar,
class LO,
class GO,
class Node>
3730 Teuchos::ArrayView<const LO>& ,
3731 Teuchos::ArrayView<const Scalar>& )
const
3733 TEUCHOS_TEST_FOR_EXCEPTION(
3734 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getLocalRowView: "
3735 "This class doesn't support local matrix indexing.");
3738 template<
class Scalar,
class LO,
class GO,
class Node>
3743 #ifdef HAVE_TPETRA_DEBUG
3744 const char prefix[] =
3745 "Tpetra::BlockCrsMatrix::getLocalDiagCopy: ";
3746 #endif // HAVE_TPETRA_DEBUG
3748 const size_t lclNumMeshRows = graph_.getNodeNumRows ();
3750 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3751 graph_.getLocalDiagOffsets (diagOffsets);
3754 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3757 diag.template modify<typename decltype (diagOffsetsHost)::memory_space> ();
3759 #ifdef HAVE_TPETRA_DEBUG
3760 TEUCHOS_TEST_FOR_EXCEPTION
3761 (this->need_sync_host (), std::runtime_error,
3762 prefix <<
"The matrix's data were last modified on device, but have "
3763 "not been sync'd to host. Please sync to host (by calling "
3764 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
3766 #endif // HAVE_TPETRA_DEBUG
3768 auto vals_host_out = getValuesHost ();
3769 Scalar* vals_host_out_raw =
3770 reinterpret_cast<Scalar*
> (vals_host_out.data ());
3773 size_t rowOffset = 0;
3775 LO bs = getBlockSize();
3776 for(
size_t r=0; r<getNodeNumRows(); r++)
3779 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3780 for(
int b=0; b<bs; b++)
3785 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3788 diag.template sync<memory_space> ();
3791 template<
class Scalar,
class LO,
class GO,
class Node>
3796 TEUCHOS_TEST_FOR_EXCEPTION(
3797 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3798 "not implemented.");
3802 template<
class Scalar,
class LO,
class GO,
class Node>
3807 TEUCHOS_TEST_FOR_EXCEPTION(
3808 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3809 "not implemented.");
3813 template<
class Scalar,
class LO,
class GO,
class Node>
3814 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3821 template<
class Scalar,
class LO,
class GO,
class Node>
3822 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3826 TEUCHOS_TEST_FOR_EXCEPTION(
3827 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3828 "not implemented.");
3838 #define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3839 template class BlockCrsMatrix< S, LO, GO, NODE >;
3841 #endif // TPETRA_BLOCKCRSMATRIX_DEF_HPP
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const
The current number of entries on the calling process in the specified global row. ...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
void gaussSeidelCopy(MultiVector< Scalar, LO, GO, Node > &X, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &B, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type > local_graph_type
The type of the part of the sparse graph on each MPI process.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
size_t getNodeNumRows() const
get the local number of block rows
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
virtual bool isGloballyIndexed() const
Whether matrix indices are globally indexed.
size_t getNumEntriesInLocalRow(const LO localRowInd) const
Return the number of entries in the given row on the calling process.
LO local_ordinal_type
The type of local indices.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
virtual void getGlobalRowView(GO GlobalRow, Teuchos::ArrayView< const GO > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const
The Frobenius norm of the matrix.
virtual global_size_t getGlobalNumCols() const
The global number of columns of this matrix.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, Distributor &)
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
One or more distributed dense vectors.
virtual bool isLocallyIndexed() const
Whether matrix indices are locally indexed.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Linear algebra kernels for small dense matrices and vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, Distributor &, const CombineMode combineMode)
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the left with the given Vector x.
virtual bool hasColMap() const
Whether this matrix has a well-defined column Map.
Kokkos::View< impl_scalar_type *, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_vec_type
The type used to access nonconst vector blocks.
virtual size_t getNodeNumEntries() const
The local number of stored (structurally nonzero) entries.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
virtual size_t getNodeNumCols() const
The number of columns needed to apply the forward operator on this node.
int local_ordinal_type
Default value of Scalar template parameter.
local_ordinal_type getMinLocalIndex() const
The minimum local index.
MultiVector for multiple degrees of freedom per mesh point.
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
size_t global_size_t
Global size_t object.
size_t getNodeNumEntries() const override
The local number of entries in the graph.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
virtual bool isFillComplete() const
Whether fillComplete() has been called.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Like sumIntoLocalValues, but for the ABSMAX combine mode.
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
virtual size_t getGlobalMaxNumRowEntries() const
The maximum number of entries in any row over all processes in the matrix's communicator.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
size_t getNodeMaxNumRowEntries() const
Maximum number of entries in any row of the matrix, on this process.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the right with the given Vector x.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
Teuchos::RCP< const map_type > getRangeMap() const
Get the (point) range Map of this matrix.
local_ordinal_type getMaxLocalIndex() const
The maximum local index on the calling process.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Teuchos::RCP< const map_type > getRowMap() const
get the (mesh) map for the rows of this block matrix.
virtual global_size_t getGlobalNumEntries() const
The global number of stored (structurally nonzero) entries.
Sets up and executes a communication plan for a Tpetra DistObject.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
global_size_t getGlobalNumRows() const
get the global number of block rows
virtual void getGlobalRowCopy(GO GlobalRow, const Teuchos::ArrayView< GO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Get a copy of the given global row's entries.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
Replace old value with maximum of magnitudes of old and new values.
void sync_host()
Sync the matrix's values to host space.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs)
Replace existing values with new values.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
Replace old values with zero.
virtual bool supportsRowViews() const
Whether this object implements getLocalRowView() and getGlobalRowView().
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
LO absMaxLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValuesByOffsets, but for the ABSMAX combine mode.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which this matrix is distributed.
LO getLocalRowView(const LO localRowInd, const LO *&colInds, Scalar *&vals, LO &numInds) const
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
LO getNumVectors() const
Get the number of columns (vectors) in the BlockMultiVector.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
char packet_type
Implementation detail; tells.
void localGaussSeidel(const BlockMultiVector< Scalar, LO, GO, Node > &Residual, BlockMultiVector< Scalar, LO, GO, Node > &Solution, const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D_inv, const Scalar &omega, const ESweepDirection direction) const
Local Gauss-Seidel solve, given a factorized diagonal.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
Print a description of this object to the given output stream.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
A distributed dense vector.
Teuchos::RCP< const map_type > getDomainMap() const
Get the (point) domain Map of this matrix.
void getLocalRowCopy(LO LocalRow, const Teuchos::ArrayView< LO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Not implemented.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
local_graph_type getLocalGraph() const
Get the local graph.
Teuchos::RCP< const map_type > getColMap() const
get the (mesh) map for the columns of this block matrix.
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const
Get the (mesh) graph.
void reorderedGaussSeidelCopy(::Tpetra::MultiVector< Scalar, LO, GO, Node > &X, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &B, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &D, const Teuchos::ArrayView< LO > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
std::string description() const
One-line description of this object.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value) const
Replace current value at the specified location with specified values.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
virtual GO getIndexBase() const
The index base for global indices in this matrix.