42 #ifndef TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP
51 #include "Tpetra_Experimental_BlockMultiVector.hpp"
54 #include "Teuchos_TimeMonitor.hpp"
55 #ifdef HAVE_TPETRA_DEBUG
57 #endif // HAVE_TPETRA_DEBUG
71 #if defined(__CUDACC__)
73 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
74 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
75 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
77 #elif defined(__GNUC__)
79 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
80 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
81 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
83 #else // some other compiler
86 # if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
87 # define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1
88 # endif // ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
89 #endif // defined(__CUDACC__), defined(__GNUC__)
93 namespace Experimental {
98 struct BlockCrsRowStruct {
99 T totalNumEntries, totalNumBytes, maxRowLength;
100 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct() =
default;
101 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
102 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
103 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
108 KOKKOS_INLINE_FUNCTION
109 void operator+=(
volatile BlockCrsRowStruct<T> &a,
110 volatile const BlockCrsRowStruct<T> &b) {
111 a.totalNumEntries += b.totalNumEntries;
112 a.totalNumBytes += b.totalNumBytes;
113 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
118 KOKKOS_INLINE_FUNCTION
119 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
120 a.totalNumEntries += b.totalNumEntries;
121 a.totalNumBytes += b.totalNumBytes;
122 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
125 template<
typename T,
typename ExecSpace>
126 struct BlockCrsReducer {
127 typedef BlockCrsReducer reducer;
128 typedef T value_type;
129 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
132 KOKKOS_INLINE_FUNCTION
133 BlockCrsReducer(value_type &val) : value(&val) {}
135 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
136 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type &dst,
const volatile value_type &src)
const { dst += src; }
137 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
138 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
139 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
143 template<
class AlphaCoeffType,
145 class MatrixValuesType,
149 class BcrsApplyNoTrans1VecTeamFunctor {
151 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
152 "MatrixValuesType must be a Kokkos::View.");
153 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
154 "OutVecType must be a Kokkos::View.");
155 static_assert (Kokkos::Impl::is_view<InVecType>::value,
156 "InVecType must be a Kokkos::View.");
157 static_assert (std::is_same<MatrixValuesType,
158 typename MatrixValuesType::const_type>::value,
159 "MatrixValuesType must be a const Kokkos::View.");
160 static_assert (std::is_same<OutVecType,
161 typename OutVecType::non_const_type>::value,
162 "OutVecType must be a nonconst Kokkos::View.");
163 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
164 "InVecType must be a const Kokkos::View.");
165 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
166 "MatrixValuesType must be a rank-1 Kokkos::View.");
167 static_assert (static_cast<int> (InVecType::rank) == 1,
168 "InVecType must be a rank-1 Kokkos::View.");
169 static_assert (static_cast<int> (OutVecType::rank) == 1,
170 "OutVecType must be a rank-1 Kokkos::View.");
171 typedef typename MatrixValuesType::non_const_value_type scalar_type;
172 typedef typename GraphType::device_type device_type;
173 typedef typename device_type::execution_space execution_space;
174 typedef typename execution_space::scratch_memory_space shmem_space;
178 typedef typename std::remove_const<typename GraphType::data_type>::type
181 typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,
190 void setX (
const InVecType& X) { X_ = X; }
198 void setY (
const OutVecType& Y) { Y_ = Y; }
204 getScratchSizePerTeam ()
const
208 typedef typename std::decay<decltype (Y_(0))>::type y_value_type;
209 return blockSize_ *
sizeof (y_value_type);
216 getScratchSizePerThread ()
const
220 typedef typename std::decay<decltype (Y_(0))>::type y_value_type;
221 return blockSize_ *
sizeof (y_value_type);
226 getNumLclMeshRows ()
const
228 return ptr_.extent (0) == 0 ?
230 static_cast<local_ordinal_type> (ptr_.extent (0) - 1);
244 BcrsApplyNoTrans1VecTeamFunctor (
const typename std::decay<AlphaCoeffType>::type& alpha,
245 const GraphType& graph,
246 const MatrixValuesType& val,
247 const local_ordinal_type blockSize,
249 const typename std::decay<BetaCoeffType>::type& beta,
251 const local_ordinal_type rowsPerTeam = defaultRowsPerTeam) :
253 ptr_ (graph.row_map),
254 ind_ (graph.entries),
256 blockSize_ (blockSize),
260 rowsPerTeam_ (rowsPerTeam)
267 KOKKOS_INLINE_FUNCTION
void
268 operator () (
const typename policy_type::member_type& member)
const
274 using Kokkos::Details::ArithTraits;
280 using Kokkos::parallel_for;
281 using Kokkos::subview;
283 typedef typename decltype (ptr_)::non_const_value_type offset_type;
284 typedef Kokkos::View<
typename OutVecType::non_const_value_type*,
285 shmem_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
287 typedef Kokkos::View<
typename OutVecType::non_const_value_type*,
290 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
292 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
295 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
298 const LO leagueRank = member.league_rank();
303 shared_array_type threadLocalMem =
304 shared_array_type (member.thread_scratch (1), blockSize_ * rowsPerTeam_);
309 const LO numLclMeshRows = getNumLclMeshRows ();
310 const LO rowBeg = leagueRank * rowsPerTeam_;
311 const LO rowTmp = rowBeg + rowsPerTeam_;
312 const LO rowEnd = rowTmp < numLclMeshRows ? rowTmp : numLclMeshRows;
323 parallel_for (Kokkos::TeamThreadRange (member, rowBeg, rowEnd),
324 [&] (
const LO& lclRow) {
326 out_little_vec_type Y_tlm (threadLocalMem.data () + blockSize_ * member.team_rank (), blockSize_);
328 const offset_type Y_ptBeg = lclRow * blockSize_;
329 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
331 subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
332 if (beta_ == ArithTraits<BetaCoeffType>::zero ()) {
333 FILL (Y_tlm, ArithTraits<BetaCoeffType>::zero ());
335 else if (beta_ == ArithTraits<BetaCoeffType>::one ()) {
343 if (alpha_ != ArithTraits<AlphaCoeffType>::zero ()) {
344 const offset_type blkBeg = ptr_[lclRow];
345 const offset_type blkEnd = ptr_[lclRow+1];
347 const offset_type bs2 = blockSize_ * blockSize_;
348 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
350 little_block_type A_cur (val_.data () + absBlkOff * bs2,
351 blockSize_, blockSize_);
352 const offset_type X_blkCol = ind_[absBlkOff];
353 const offset_type X_ptBeg = X_blkCol * blockSize_;
354 const offset_type X_ptEnd = X_ptBeg + blockSize_;
356 subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
358 GEMV (alpha_, A_cur, X_cur, Y_tlm);
366 typename std::decay<AlphaCoeffType>::type alpha_;
367 typename GraphType::row_map_type::const_type ptr_;
368 typename GraphType::entries_type::const_type ind_;
369 MatrixValuesType val_;
372 typename std::decay<BetaCoeffType>::type beta_;
378 template<
class AlphaCoeffType,
380 class MatrixValuesType,
384 class BcrsApplyNoTrans1VecFunctor {
386 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
387 "MatrixValuesType must be a Kokkos::View.");
388 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
389 "OutVecType must be a Kokkos::View.");
390 static_assert (Kokkos::Impl::is_view<InVecType>::value,
391 "InVecType must be a Kokkos::View.");
392 static_assert (std::is_same<MatrixValuesType,
393 typename MatrixValuesType::const_type>::value,
394 "MatrixValuesType must be a const Kokkos::View.");
395 static_assert (std::is_same<OutVecType,
396 typename OutVecType::non_const_type>::value,
397 "OutVecType must be a nonconst Kokkos::View.");
398 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
399 "InVecType must be a const Kokkos::View.");
400 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
401 "MatrixValuesType must be a rank-1 Kokkos::View.");
402 static_assert (static_cast<int> (InVecType::rank) == 1,
403 "InVecType must be a rank-1 Kokkos::View.");
404 static_assert (static_cast<int> (OutVecType::rank) == 1,
405 "OutVecType must be a rank-1 Kokkos::View.");
406 typedef typename MatrixValuesType::non_const_value_type scalar_type;
409 typedef typename GraphType::device_type device_type;
412 typedef typename std::remove_const<typename GraphType::data_type>::type
415 typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,
416 typename device_type::execution_space,
424 void setX (
const InVecType& X) { X_ = X; }
432 void setY (
const OutVecType& Y) { Y_ = Y; }
434 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
435 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
438 BcrsApplyNoTrans1VecFunctor (
const alpha_coeff_type& alpha,
439 const GraphType& graph,
440 const MatrixValuesType& val,
441 const local_ordinal_type blockSize,
443 const beta_coeff_type& beta,
444 const OutVecType& Y) :
446 ptr_ (graph.row_map),
447 ind_ (graph.entries),
449 blockSize_ (blockSize),
455 KOKKOS_INLINE_FUNCTION
void
456 operator () (
const local_ordinal_type& lclRow)
const
462 using Kokkos::Details::ArithTraits;
468 using Kokkos::parallel_for;
469 using Kokkos::subview;
470 typedef typename decltype (ptr_)::non_const_value_type offset_type;
471 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
474 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
477 const offset_type Y_ptBeg = lclRow * blockSize_;
478 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
479 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
483 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
484 FILL (Y_cur, ArithTraits<beta_coeff_type>::zero ());
486 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
490 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
491 const offset_type blkBeg = ptr_[lclRow];
492 const offset_type blkEnd = ptr_[lclRow+1];
494 const offset_type bs2 = blockSize_ * blockSize_;
495 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
497 little_block_type A_cur (val_.data () + absBlkOff * bs2,
498 blockSize_, blockSize_);
499 const offset_type X_blkCol = ind_[absBlkOff];
500 const offset_type X_ptBeg = X_blkCol * blockSize_;
501 const offset_type X_ptEnd = X_ptBeg + blockSize_;
502 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
504 GEMV (alpha_, A_cur, X_cur, Y_cur);
510 alpha_coeff_type alpha_;
511 typename GraphType::row_map_type::const_type ptr_;
512 typename GraphType::entries_type::const_type ind_;
513 MatrixValuesType val_;
516 beta_coeff_type beta_;
520 template<
class AlphaCoeffType,
522 class MatrixValuesType,
523 class InMultiVecType,
525 class OutMultiVecType>
527 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
528 const GraphType& graph,
529 const MatrixValuesType& val,
530 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
531 const InMultiVecType& X,
532 const BetaCoeffType& beta,
533 const OutMultiVecType& Y
535 ,
const typename std::remove_const<typename GraphType::data_type>::type rowsPerTeam = 20
539 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
540 "MatrixValuesType must be a Kokkos::View.");
541 static_assert (Kokkos::Impl::is_view<OutMultiVecType>::value,
542 "OutMultiVecType must be a Kokkos::View.");
543 static_assert (Kokkos::Impl::is_view<InMultiVecType>::value,
544 "InMultiVecType must be a Kokkos::View.");
545 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
546 "MatrixValuesType must be a rank-1 Kokkos::View.");
547 static_assert (static_cast<int> (OutMultiVecType::rank) == 2,
548 "OutMultiVecType must be a rank-2 Kokkos::View.");
549 static_assert (static_cast<int> (InMultiVecType::rank) == 2,
550 "InMultiVecType must be a rank-2 Kokkos::View.");
552 typedef typename MatrixValuesType::const_type matrix_values_type;
553 typedef typename OutMultiVecType::non_const_type out_multivec_type;
554 typedef typename InMultiVecType::const_type in_multivec_type;
555 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_type;
556 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_type;
557 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
559 const LO numLocalMeshRows = graph.row_map.extent (0) == 0 ?
560 static_cast<LO
> (0) :
561 static_cast<LO> (graph.row_map.extent (0) - 1);
562 const LO numVecs = Y.extent (1);
563 if (numLocalMeshRows == 0 || numVecs == 0) {
570 in_multivec_type X_in = X;
571 out_multivec_type Y_out = Y;
576 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
577 typedef decltype (Kokkos::subview (Y_out, Kokkos::ALL (), 0)) out_vec_type;
579 typedef BcrsApplyNoTrans1VecTeamFunctor<alpha_type, GraphType,
580 matrix_values_type, in_vec_type, beta_type, out_vec_type> functor_type;
582 typedef BcrsApplyNoTrans1VecFunctor<alpha_type, GraphType,
583 matrix_values_type, in_vec_type, beta_type, out_vec_type> functor_type;
585 typedef typename functor_type::policy_type policy_type;
587 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
588 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
590 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0, rowsPerTeam);
591 const LO numTeams = functor.getNumTeams ();
592 policy_type policy (numTeams, Kokkos::AUTO ());
598 const LO scratchSizePerTeam = functor.getScratchSizePerTeam ();
599 const LO scratchSizePerThread = functor.getScratchSizePerThread ();
601 policy.set_scratch_size (level,
602 Kokkos::PerTeam (scratchSizePerTeam),
603 Kokkos::PerThread (scratchSizePerThread));
606 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
607 policy_type policy (0, numLocalMeshRows);
611 Kokkos::parallel_for (policy, functor);
614 for (LO j = 1; j < numVecs; ++j) {
615 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
616 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
619 Kokkos::parallel_for (policy, functor);
629 template<
class Scalar,
class LO,
class GO,
class Node>
630 class GetLocalDiagCopy {
632 typedef typename Node::device_type device_type;
633 typedef size_t diag_offset_type;
634 typedef Kokkos::View<
const size_t*, device_type,
635 Kokkos::MemoryUnmanaged> diag_offsets_type;
636 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
638 typedef typename local_graph_type::row_map_type row_offsets_type;
639 typedef typename ::Tpetra::Experimental::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
640 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
641 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
644 GetLocalDiagCopy (
const diag_type& diag,
645 const values_type& val,
646 const diag_offsets_type& diagOffsets,
647 const row_offsets_type& ptr,
648 const LO blockSize) :
650 diagOffsets_ (diagOffsets),
652 blockSize_ (blockSize),
653 offsetPerBlock_ (blockSize_*blockSize_),
657 KOKKOS_INLINE_FUNCTION
void
658 operator() (
const LO& lclRowInd)
const
663 const size_t absOffset = ptr_[lclRowInd];
666 const size_t relOffset = diagOffsets_[lclRowInd];
669 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
673 typedef Kokkos::View<
const IST**, Kokkos::LayoutRight,
674 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
675 const_little_block_type;
676 const_little_block_type D_in (val_.data () + pointOffset,
677 blockSize_, blockSize_);
678 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
684 diag_offsets_type diagOffsets_;
685 row_offsets_type ptr_;
692 template<
class Scalar,
class LO,
class GO,
class Node>
694 BlockCrsMatrix<Scalar, LO, GO, Node>::
695 markLocalErrorAndGetStream ()
697 * (this->localError_) =
true;
698 if ((*errs_).is_null ()) {
699 *errs_ = Teuchos::rcp (
new std::ostringstream ());
704 template<
class Scalar,
class LO,
class GO,
class Node>
708 graph_ (Teuchos::rcp (new
map_type ()), 0),
709 blockSize_ (static_cast<LO> (0)),
710 X_colMap_ (new Teuchos::RCP<
BMV> ()),
711 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
712 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
714 localError_ (new bool (false)),
715 errs_ (new Teuchos::RCP<std::ostringstream> ())
719 template<
class Scalar,
class LO,
class GO,
class Node>
722 const LO blockSize) :
725 rowMeshMap_ (* (graph.getRowMap ())),
726 blockSize_ (blockSize),
727 X_colMap_ (new Teuchos::RCP<
BMV> ()),
728 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
729 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
730 offsetPerBlock_ (blockSize * blockSize),
731 localError_ (new bool (false)),
732 errs_ (new Teuchos::RCP<std::ostringstream> ())
734 TEUCHOS_TEST_FOR_EXCEPTION(
735 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::"
736 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
737 "rows (isSorted() is false). This class assumes sorted rows.");
739 graphRCP_ = Teuchos::rcpFromRef(graph_);
745 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
746 TEUCHOS_TEST_FOR_EXCEPTION(
747 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::"
748 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
749 " <= 0. The block size must be positive.");
755 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
756 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
759 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
764 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
765 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
768 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
774 val_ = decltype (val_) (
"val", numValEnt);
777 template<
class Scalar,
class LO,
class GO,
class Node>
782 const LO blockSize) :
785 rowMeshMap_ (* (graph.getRowMap ())),
786 domainPointMap_ (domainPointMap),
787 rangePointMap_ (rangePointMap),
788 blockSize_ (blockSize),
789 X_colMap_ (new Teuchos::RCP<
BMV> ()),
790 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
791 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
792 offsetPerBlock_ (blockSize * blockSize),
793 localError_ (new bool (false)),
794 errs_ (new Teuchos::RCP<std::ostringstream> ())
796 TEUCHOS_TEST_FOR_EXCEPTION(
797 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::"
798 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
799 "rows (isSorted() is false). This class assumes sorted rows.");
801 graphRCP_ = Teuchos::rcpFromRef(graph_);
807 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
808 TEUCHOS_TEST_FOR_EXCEPTION(
809 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::"
810 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
811 " <= 0. The block size must be positive.");
814 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
815 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
818 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
823 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
824 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
827 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
833 val_ = decltype (val_) (
"val", numValEnt);
836 template<
class Scalar,
class LO,
class GO,
class Node>
837 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
842 return Teuchos::rcp (
new map_type (domainPointMap_));
845 template<
class Scalar,
class LO,
class GO,
class Node>
846 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
851 return Teuchos::rcp (
new map_type (rangePointMap_));
854 template<
class Scalar,
class LO,
class GO,
class Node>
855 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
859 return graph_.getRowMap();
862 template<
class Scalar,
class LO,
class GO,
class Node>
863 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
867 return graph_.getColMap();
870 template<
class Scalar,
class LO,
class GO,
class Node>
875 return graph_.getGlobalNumRows();
878 template<
class Scalar,
class LO,
class GO,
class Node>
883 return graph_.getNodeNumRows();
886 template<
class Scalar,
class LO,
class GO,
class Node>
891 return graph_.getNodeMaxNumRowEntries();
894 template<
class Scalar,
class LO,
class GO,
class Node>
899 Teuchos::ETransp mode,
904 TEUCHOS_TEST_FOR_EXCEPTION(
905 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
906 std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::apply: "
907 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
908 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
912 const LO blockSize = getBlockSize ();
914 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
915 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
917 catch (std::invalid_argument& e) {
918 TEUCHOS_TEST_FOR_EXCEPTION(
919 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::"
920 "apply: Either the input MultiVector X or the output MultiVector Y "
921 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
922 "graph. BlockMultiVector's constructor threw the following exception: "
931 const_cast<this_type*
> (
this)->applyBlock (X_view, Y_view, mode, alpha, beta);
932 }
catch (std::invalid_argument& e) {
933 TEUCHOS_TEST_FOR_EXCEPTION(
934 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::"
935 "apply: The implementation method applyBlock complained about having "
936 "an invalid argument. It reported the following: " << e.what ());
937 }
catch (std::logic_error& e) {
938 TEUCHOS_TEST_FOR_EXCEPTION(
939 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::"
940 "apply: The implementation method applyBlock complained about a "
941 "possible bug in its implementation. It reported the following: "
943 }
catch (std::exception& e) {
944 TEUCHOS_TEST_FOR_EXCEPTION(
945 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::"
946 "apply: The implementation method applyBlock threw an exception which "
947 "is neither std::invalid_argument nor std::logic_error, but is a "
948 "subclass of std::exception. It reported the following: "
951 TEUCHOS_TEST_FOR_EXCEPTION(
952 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::"
953 "apply: The implementation method applyBlock threw an exception which "
954 "is not an instance of a subclass of std::exception. This probably "
955 "indicates a bug. Please report this to the Tpetra developers.");
959 template<
class Scalar,
class LO,
class GO,
class Node>
964 Teuchos::ETransp mode,
968 TEUCHOS_TEST_FOR_EXCEPTION(
970 "Tpetra::Experimental::BlockCrsMatrix::applyBlock: "
971 "X and Y have different block sizes. X.getBlockSize() = "
975 if (mode == Teuchos::NO_TRANS) {
976 applyBlockNoTrans (X, Y, alpha, beta);
977 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
978 applyBlockTrans (X, Y, mode, alpha, beta);
980 TEUCHOS_TEST_FOR_EXCEPTION(
981 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::"
982 "applyBlock: Invalid 'mode' argument. Valid values are "
983 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
987 template<
class Scalar,
class LO,
class GO,
class Node>
992 #ifdef HAVE_TPETRA_DEBUG
993 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::setAllToScalar: ";
994 #endif // HAVE_TPETRA_DEBUG
996 if (this->need_sync_device ()) {
1000 #ifdef HAVE_TPETRA_DEBUG
1001 TEUCHOS_TEST_FOR_EXCEPTION
1002 (this->need_sync_host (), std::runtime_error,
1003 prefix <<
"The matrix's values need sync on both device and host.");
1004 #endif // HAVE_TPETRA_DEBUG
1005 this->modify_host ();
1013 this->modify_device ();
1018 template<
class Scalar,
class LO,
class GO,
class Node>
1023 const Scalar vals[],
1024 const LO numColInds)
const
1026 #ifdef HAVE_TPETRA_DEBUG
1027 const char prefix[] =
1028 "Tpetra::Experimental::BlockCrsMatrix::replaceLocalValues: ";
1029 #endif // HAVE_TPETRA_DEBUG
1031 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1036 return static_cast<LO
> (0);
1040 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1041 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1042 const LO perBlockSize = this->offsetPerBlock ();
1047 #ifdef HAVE_TPETRA_DEBUG
1048 TEUCHOS_TEST_FOR_EXCEPTION
1049 (this->need_sync_host (), std::runtime_error,
1050 prefix <<
"The matrix's data were last modified on device, but have "
1051 "not been sync'd to host. Please sync to host (by calling "
1052 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1054 #endif // HAVE_TPETRA_DEBUG
1056 auto vals_host_out = getValuesHost ();
1059 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1060 const LO relBlockOffset =
1061 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1062 if (relBlockOffset != LINV) {
1071 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1075 vals_host_out_raw + absBlockOffset * perBlockSize;
1080 for (LO i = 0; i < perBlockSize; ++i) {
1081 A_old[i] = A_new[i];
1083 hint = relBlockOffset + 1;
1090 template <
class Scalar,
class LO,
class GO,
class Node>
1094 Kokkos::MemoryUnmanaged>& offsets)
const
1096 graph_.getLocalDiagOffsets (offsets);
1099 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1100 template <
class Scalar,
class LO,
class GO,
class Node>
1101 void TPETRA_DEPRECATED
1110 const size_t lclNumRows = graph_.getNodeNumRows ();
1111 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
1112 offsets.resize (lclNumRows);
1118 typedef typename device_type::memory_space memory_space;
1119 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
1123 typedef Kokkos::View<
size_t*, device_type,
1124 Kokkos::MemoryUnmanaged> output_type;
1125 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1126 graph_.getLocalDiagOffsets (offsetsOut);
1129 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
1130 graph_.getLocalDiagOffsets (offsetsTmp);
1131 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
1132 Kokkos::MemoryUnmanaged> output_type;
1133 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1137 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1139 template <
class Scalar,
class LO,
class GO,
class Node>
1145 Kokkos::MemoryUnmanaged>& D_inv,
1146 const Scalar& omega,
1151 Kokkos::Details::ArithTraits<impl_scalar_type>::zero ();
1153 Kokkos::Details::ArithTraits<impl_scalar_type>::one ();
1154 const LO numLocalMeshRows =
1155 static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1162 const LO blockSize = getBlockSize ();
1163 Teuchos::Array<impl_scalar_type> localMem (blockSize);
1164 Teuchos::Array<impl_scalar_type> localMat (blockSize*blockSize);
1168 LO rowBegin = 0, rowEnd = 0, rowStride = 0;
1169 if (direction == Forward) {
1171 rowEnd = numLocalMeshRows+1;
1174 else if (direction == Backward) {
1175 rowBegin = numLocalMeshRows;
1179 else if (direction == Symmetric) {
1180 this->localGaussSeidel (B, X, D_inv, omega, Forward);
1181 this->localGaussSeidel (B, X, D_inv, omega, Backward);
1185 const Scalar one_minus_omega = Teuchos::ScalarTraits<Scalar>::one()-omega;
1186 const Scalar minus_omega = -omega;
1189 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1190 const LO actlRow = lclRow - 1;
1193 COPY (B_cur, X_lcl);
1194 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1196 const size_t meshBeg = ptrHost_[actlRow];
1197 const size_t meshEnd = ptrHost_[actlRow+1];
1198 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1199 const LO meshCol = indHost_[absBlkOff];
1200 const_little_block_type A_cur =
1201 getConstLocalBlockFromAbsOffset (absBlkOff);
1205 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1207 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1213 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1215 FILL (X_update, zero);
1216 GEMV (one, D_lcl, X_lcl, X_update);
1220 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1221 for (LO j = 0; j < numVecs; ++j) {
1222 LO actlRow = lclRow-1;
1225 COPY (B_cur, X_lcl);
1226 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1228 const size_t meshBeg = ptrHost_[actlRow];
1229 const size_t meshEnd = ptrHost_[actlRow+1];
1230 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1231 const LO meshCol = indHost_[absBlkOff];
1232 const_little_block_type A_cur =
1233 getConstLocalBlockFromAbsOffset (absBlkOff);
1237 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1238 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1241 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1243 FILL (X_update, zero);
1244 GEMV (one, D_lcl, X_lcl, X_update);
1250 template <
class Scalar,
class LO,
class GO,
class Node>
1263 TEUCHOS_TEST_FOR_EXCEPTION(
1264 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::"
1265 "gaussSeidelCopy: Not implemented.");
1268 template <
class Scalar,
class LO,
class GO,
class Node>
1274 const Teuchos::ArrayView<LO>& ,
1282 TEUCHOS_TEST_FOR_EXCEPTION(
1283 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::"
1284 "reorderedGaussSeidelCopy: Not implemented.");
1287 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1288 template <
class Scalar,
class LO,
class GO,
class Node>
1289 void TPETRA_DEPRECATED
1292 const Teuchos::ArrayView<const size_t>& offsets)
const
1294 using Teuchos::ArrayRCP;
1295 using Teuchos::ArrayView;
1296 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1298 const size_t myNumRows = rowMeshMap_.getNodeNumElements();
1299 const LO* columnIndices;
1302 Teuchos::Array<LO> cols(1);
1305 Teuchos::Array<Scalar> zeroMat (blockSize_*blockSize_, ZERO);
1306 for (
size_t i = 0; i < myNumRows; ++i) {
1308 if (offsets[i] == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1312 getLocalRowView (i, columnIndices, vals, numColumns);
1313 diag.
replaceLocalValues (i, cols.getRawPtr(), &vals[offsets[i]*blockSize_*blockSize_], 1);
1317 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1319 template <
class Scalar,
class LO,
class GO,
class Node>
1323 Kokkos::MemoryUnmanaged>& diag,
1324 const Kokkos::View<
const size_t*, device_type,
1325 Kokkos::MemoryUnmanaged>& offsets)
const
1327 using Kokkos::parallel_for;
1329 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
1331 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1332 const LO blockSize = this->getBlockSize ();
1333 TEUCHOS_TEST_FOR_EXCEPTION
1334 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
1335 static_cast<LO> (diag.extent (1)) < blockSize ||
1336 static_cast<LO> (diag.extent (2)) < blockSize,
1337 std::invalid_argument, prefix <<
1338 "The input Kokkos::View is not big enough to hold all the data.");
1339 TEUCHOS_TEST_FOR_EXCEPTION
1340 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
1341 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
1342 "diagonal blocks " << lclNumMeshRows <<
".");
1344 #ifdef HAVE_TPETRA_DEBUG
1345 TEUCHOS_TEST_FOR_EXCEPTION
1346 (this->
template need_sync<device_type> (), std::runtime_error,
1347 prefix <<
"The matrix's data were last modified on host, but have "
1348 "not been sync'd to device. Please sync to device (by calling "
1349 "sync<device_type>() on this matrix) before calling this method.");
1350 #endif // HAVE_TPETRA_DEBUG
1352 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
1353 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
1361 const_cast<this_type*
> (
this)->
template getValues<device_type> ();
1363 parallel_for (policy_type (0, lclNumMeshRows),
1364 functor_type (diag, vals_dev, offsets,
1365 graph_.getLocalGraph ().row_map, blockSize_));
1369 template <
class Scalar,
class LO,
class GO,
class Node>
1373 Kokkos::MemoryUnmanaged>& diag,
1374 const Teuchos::ArrayView<const size_t>& offsets)
const
1377 using Kokkos::parallel_for;
1379 Kokkos::MemoryUnmanaged>::HostMirror::execution_space host_exec_space;
1381 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1382 const LO blockSize = this->getBlockSize ();
1383 TEUCHOS_TEST_FOR_EXCEPTION
1384 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
1385 static_cast<LO> (diag.extent (1)) < blockSize ||
1386 static_cast<LO> (diag.extent (2)) < blockSize,
1387 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: "
1388 "The input Kokkos::View is not big enough to hold all the data.");
1389 TEUCHOS_TEST_FOR_EXCEPTION
1390 (static_cast<LO> (offsets.size ()) < lclNumMeshRows,
1391 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: "
1392 "offsets.size() = " << offsets.size () <<
" < local number of diagonal "
1393 "blocks " << lclNumMeshRows <<
".");
1397 typedef Kokkos::RangePolicy<host_exec_space, LO> policy_type;
1398 parallel_for (policy_type (0, lclNumMeshRows), [=] (
const LO& lclMeshRow) {
1399 auto D_in = this->getConstLocalBlockFromRelOffset (lclMeshRow, offsets[lclMeshRow]);
1400 auto D_out = Kokkos::subview (diag, lclMeshRow, ALL (), ALL ());
1406 template<
class Scalar,
class LO,
class GO,
class Node>
1411 const Scalar vals[],
1412 const LO numColInds)
const
1414 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1419 return static_cast<LO
> (0);
1423 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1424 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1425 const LO perBlockSize = this->offsetPerBlock ();
1430 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1431 const LO relBlockOffset =
1432 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1433 if (relBlockOffset != LINV) {
1434 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1435 little_block_type A_old =
1436 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1437 const_little_block_type A_new =
1438 getConstLocalBlockFromInput (vIn, pointOffset);
1440 ::Tpetra::Experimental::Impl::absMax (A_old, A_new);
1441 hint = relBlockOffset + 1;
1449 template<
class Scalar,
class LO,
class GO,
class Node>
1454 const Scalar vals[],
1455 const LO numColInds)
const
1457 #ifdef HAVE_TPETRA_DEBUG
1458 const char prefix[] =
1459 "Tpetra::Experimental::BlockCrsMatrix::sumIntoLocalValues: ";
1460 #endif // HAVE_TPETRA_DEBUG
1462 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1467 return static_cast<LO
> (0);
1472 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1473 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1474 const LO perBlockSize = this->offsetPerBlock ();
1479 #ifdef HAVE_TPETRA_DEBUG
1480 TEUCHOS_TEST_FOR_EXCEPTION
1481 (this->need_sync_host (), std::runtime_error,
1482 prefix <<
"The matrix's data were last modified on device, but have not "
1483 "been sync'd to host. Please sync to host (by calling "
1484 "sync<Kokkos::HostSpace>() on this matrix) before calling this method.");
1485 #endif // HAVE_TPETRA_DEBUG
1487 auto vals_host_out =
1491 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1492 const LO relBlockOffset =
1493 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1494 if (relBlockOffset != LINV) {
1503 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1507 vals_host_out_raw + absBlockOffset * perBlockSize;
1512 for (LO i = 0; i < perBlockSize; ++i) {
1513 A_old[i] += A_new[i];
1515 hint = relBlockOffset + 1;
1522 template<
class Scalar,
class LO,
class GO,
class Node>
1530 #ifdef HAVE_TPETRA_DEBUG
1531 const char prefix[] =
1532 "Tpetra::Experimental::BlockCrsMatrix::getLocalRowView: ";
1533 #endif // HAVE_TPETRA_DEBUG
1535 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1539 return Teuchos::OrdinalTraits<LO>::invalid ();
1542 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
1543 colInds = indHost_.data () + absBlockOffsetStart;
1545 #ifdef HAVE_TPETRA_DEBUG
1546 TEUCHOS_TEST_FOR_EXCEPTION
1547 (this->need_sync_host (), std::runtime_error,
1548 prefix <<
"The matrix's data were last modified on device, but have "
1549 "not been sync'd to host. Please sync to host (by calling "
1550 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1552 #endif // HAVE_TPETRA_DEBUG
1554 auto vals_host_out = getValuesHost ();
1557 absBlockOffsetStart * offsetPerBlock ();
1558 vals =
reinterpret_cast<Scalar*
> (vOut);
1560 numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
1565 template<
class Scalar,
class LO,
class GO,
class Node>
1569 const Teuchos::ArrayView<LO>& Indices,
1570 const Teuchos::ArrayView<Scalar>& Values,
1571 size_t &NumEntries)
const
1576 getLocalRowView(LocalRow,colInds,vals,numInds);
1577 if (numInds > Indices.size() || numInds*blockSize_*blockSize_ > Values.size()) {
1578 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
1579 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
1580 << numInds <<
" row entries");
1582 for (LO i=0; i<numInds; ++i) {
1583 Indices[i] = colInds[i];
1585 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1586 Values[i] = vals[i];
1588 NumEntries = numInds;
1591 template<
class Scalar,
class LO,
class GO,
class Node>
1595 ptrdiff_t offsets[],
1597 const LO numColInds)
const
1599 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1604 return static_cast<LO
> (0);
1607 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1611 for (LO k = 0; k < numColInds; ++k) {
1612 const LO relBlockOffset =
1613 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1614 if (relBlockOffset != LINV) {
1615 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
1616 hint = relBlockOffset + 1;
1624 template<
class Scalar,
class LO,
class GO,
class Node>
1628 const ptrdiff_t offsets[],
1629 const Scalar vals[],
1630 const LO numOffsets)
const
1632 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1637 return static_cast<LO
> (0);
1641 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1642 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1643 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1644 size_t pointOffset = 0;
1647 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1648 const size_t relBlockOffset = offsets[k];
1649 if (relBlockOffset != STINV) {
1650 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1651 little_block_type A_old =
1652 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1653 const_little_block_type A_new =
1654 getConstLocalBlockFromInput (vIn, pointOffset);
1655 COPY (A_new, A_old);
1663 template<
class Scalar,
class LO,
class GO,
class Node>
1667 const ptrdiff_t offsets[],
1668 const Scalar vals[],
1669 const LO numOffsets)
const
1671 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1676 return static_cast<LO
> (0);
1680 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1681 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1682 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1683 size_t pointOffset = 0;
1686 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1687 const size_t relBlockOffset = offsets[k];
1688 if (relBlockOffset != STINV) {
1689 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1690 little_block_type A_old =
1691 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1692 const_little_block_type A_new =
1693 getConstLocalBlockFromInput (vIn, pointOffset);
1694 ::Tpetra::Experimental::Impl::absMax (A_old, A_new);
1702 template<
class Scalar,
class LO,
class GO,
class Node>
1706 const ptrdiff_t offsets[],
1707 const Scalar vals[],
1708 const LO numOffsets)
const
1710 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1715 return static_cast<LO
> (0);
1720 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1721 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1722 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1723 size_t pointOffset = 0;
1726 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1727 const size_t relBlockOffset = offsets[k];
1728 if (relBlockOffset != STINV) {
1729 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1730 little_block_type A_old =
1731 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1732 const_little_block_type A_new =
1733 getConstLocalBlockFromInput (vIn, pointOffset);
1735 AXPY (ONE, A_new, A_old);
1743 template<
class Scalar,
class LO,
class GO,
class Node>
1748 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
1749 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1750 return static_cast<LO
> (0);
1752 return static_cast<LO
> (numEntInGraph);
1756 template<
class Scalar,
class LO,
class GO,
class Node>
1761 const Teuchos::ETransp mode,
1771 TEUCHOS_TEST_FOR_EXCEPTION(
1772 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::apply: "
1773 "transpose and conjugate transpose modes are not yet implemented.");
1776 template<
class Scalar,
class LO,
class GO,
class Node>
1778 BlockCrsMatrix<Scalar, LO, GO, Node>::
1779 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1780 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1786 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1787 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1788 const Scalar zero = STS::zero ();
1789 const Scalar one = STS::one ();
1790 RCP<const import_type>
import = graph_.getImporter ();
1792 RCP<const export_type> theExport = graph_.getExporter ();
1793 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1795 if (alpha == zero) {
1799 else if (beta != one) {
1804 const BMV* X_colMap = NULL;
1805 if (
import.is_null ()) {
1809 catch (std::exception& e) {
1810 TEUCHOS_TEST_FOR_EXCEPTION
1811 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1812 "operator= threw an exception: " << std::endl << e.what ());
1822 if ((*X_colMap_).is_null () ||
1823 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1824 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1825 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1826 static_cast<LO
> (X.getNumVectors ())));
1828 #ifdef HAVE_TPETRA_BCRS_DO_POINT_IMPORT
1829 if (pointImporter_->is_null ()) {
1832 const auto domainPointMap = rcp (
new typename BMV::map_type (domainPointMap_));
1833 const auto colPointMap = rcp (
new typename BMV::map_type (
1834 BMV::makePointMap (*graph_.getColMap(),
1836 *pointImporter_ = rcp (
new typename crs_graph_type::import_type (
1837 domainPointMap, colPointMap));
1839 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1846 X_colMap = &(**X_colMap_);
1848 catch (std::exception& e) {
1849 TEUCHOS_TEST_FOR_EXCEPTION
1850 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1851 "operator= threw an exception: " << std::endl << e.what ());
1855 BMV* Y_rowMap = NULL;
1856 if (theExport.is_null ()) {
1859 else if ((*Y_rowMap_).is_null () ||
1860 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1861 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1862 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1863 static_cast<LO
> (X.getNumVectors ())));
1865 Y_rowMap = &(**Y_rowMap_);
1867 catch (std::exception& e) {
1868 TEUCHOS_TEST_FOR_EXCEPTION(
1869 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1870 "operator= threw an exception: " << std::endl << e.what ());
1875 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1877 catch (std::exception& e) {
1878 TEUCHOS_TEST_FOR_EXCEPTION
1879 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1880 "an exception: " << e.what ());
1883 TEUCHOS_TEST_FOR_EXCEPTION
1884 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1885 "an exception not a subclass of std::exception.");
1888 if (! theExport.is_null ()) {
1894 template<
class Scalar,
class LO,
class GO,
class Node>
1896 BlockCrsMatrix<Scalar, LO, GO, Node>::
1897 localApplyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1898 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1902 using ::Tpetra::Experimental::Impl::bcrsLocalApplyNoTrans;
1904 const impl_scalar_type alpha_impl = alpha;
1905 const auto graph = this->graph_.getLocalGraph ();
1906 const impl_scalar_type beta_impl = beta;
1907 const LO blockSize = this->getBlockSize ();
1909 auto X_mv = X.getMultiVectorView ();
1910 auto Y_mv = Y.getMultiVectorView ();
1911 Y_mv.template modify<device_type> ();
1913 auto X_lcl = X_mv.template getLocalView<device_type> ();
1914 auto Y_lcl = Y_mv.template getLocalView<device_type> ();
1915 auto val = this->val_.template view<device_type> ();
1917 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1921 template<
class Scalar,
class LO,
class GO,
class Node>
1923 BlockCrsMatrix<Scalar, LO, GO, Node>::
1924 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1925 const LO colIndexToFind,
1926 const LO hint)
const
1928 const size_t absStartOffset = ptrHost_[localRowIndex];
1929 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1930 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1932 const LO*
const curInd = indHost_.data () + absStartOffset;
1935 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1942 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1947 const LO maxNumEntriesForLinearSearch = 32;
1948 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1953 const LO* beg = curInd;
1954 const LO* end = curInd + numEntriesInRow;
1955 std::pair<const LO*, const LO*> p =
1956 std::equal_range (beg, end, colIndexToFind);
1957 if (p.first != p.second) {
1959 relOffset =
static_cast<LO
> (p.first - beg);
1963 for (LO k = 0; k < numEntriesInRow; ++k) {
1964 if (colIndexToFind == curInd[k]) {
1974 template<
class Scalar,
class LO,
class GO,
class Node>
1976 BlockCrsMatrix<Scalar, LO, GO, Node>::
1977 offsetPerBlock ()
const
1979 return offsetPerBlock_;
1982 template<
class Scalar,
class LO,
class GO,
class Node>
1983 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1984 BlockCrsMatrix<Scalar, LO, GO, Node>::
1985 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1986 const size_t pointOffset)
const
1989 const LO rowStride = blockSize_;
1990 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1993 template<
class Scalar,
class LO,
class GO,
class Node>
1994 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1995 BlockCrsMatrix<Scalar, LO, GO, Node>::
1996 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1997 const size_t pointOffset)
const
2000 const LO rowStride = blockSize_;
2001 return little_block_type (val + pointOffset, blockSize_, rowStride);
2004 template<
class Scalar,
class LO,
class GO,
class Node>
2005 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
2006 BlockCrsMatrix<Scalar, LO, GO, Node>::
2007 getConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const
2009 #ifdef HAVE_TPETRA_DEBUG
2010 const char prefix[] =
2011 "Tpetra::Experimental::BlockCrsMatrix::getConstLocalBlockFromAbsOffset: ";
2012 #endif // HAVE_TPETRA_DEBUG
2014 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
2018 return const_little_block_type ();
2021 #ifdef HAVE_TPETRA_DEBUG
2022 TEUCHOS_TEST_FOR_EXCEPTION
2023 (this->need_sync_host (), std::runtime_error,
2024 prefix <<
"The matrix's data were last modified on device, but have "
2025 "not been sync'd to host. Please sync to host (by calling "
2026 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
2028 #endif // HAVE_TPETRA_DEBUG
2029 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2031 auto vals_host = getValuesHost ();
2032 const impl_scalar_type* vals_host_raw = vals_host.data ();
2034 return getConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2038 template<
class Scalar,
class LO,
class GO,
class Node>
2039 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
2040 BlockCrsMatrix<Scalar, LO, GO, Node>::
2041 getConstLocalBlockFromRelOffset (
const LO lclMeshRow,
2042 const size_t relMeshOffset)
const
2044 typedef impl_scalar_type IST;
2046 const LO* lclColInds = NULL;
2047 Scalar* lclVals = NULL;
2050 LO err = this->getLocalRowView (lclMeshRow, lclColInds, lclVals, numEnt);
2055 return const_little_block_type ();
2058 const size_t relPointOffset = relMeshOffset * this->offsetPerBlock ();
2059 IST* lclValsImpl =
reinterpret_cast<IST*
> (lclVals);
2060 return this->getConstLocalBlockFromInput (const_cast<const IST*> (lclValsImpl),
2065 template<
class Scalar,
class LO,
class GO,
class Node>
2066 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
2067 BlockCrsMatrix<Scalar, LO, GO, Node>::
2068 getNonConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const
2070 #ifdef HAVE_TPETRA_DEBUG
2071 const char prefix[] =
2072 "Tpetra::Experimental::BlockCrsMatrix::getNonConstLocalBlockFromAbsOffset: ";
2073 #endif // HAVE_TPETRA_DEBUG
2075 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
2079 return little_block_type ();
2082 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2083 #ifdef HAVE_TPETRA_DEBUG
2084 TEUCHOS_TEST_FOR_EXCEPTION
2085 (this->need_sync_host (), std::runtime_error,
2086 prefix <<
"The matrix's data were last modified on device, but have "
2087 "not been sync'd to host. Please sync to host (by calling "
2088 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
2090 #endif // HAVE_TPETRA_DEBUG
2091 auto vals_host = getValuesHost();
2092 impl_scalar_type* vals_host_raw = vals_host.data ();
2093 return getNonConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2097 template<
class Scalar,
class LO,
class GO,
class Node>
2098 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
2099 BlockCrsMatrix<Scalar, LO, GO, Node>::
2100 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const
2102 const size_t absRowBlockOffset = ptrHost_[localRowInd];
2103 const LO relBlockOffset =
2104 this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
2106 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
2107 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
2108 return getNonConstLocalBlockFromAbsOffset (absBlockOffset);
2111 return little_block_type ();
2125 template<
class Scalar,
class LO,
class GO,
class Node>
2127 BlockCrsMatrix<Scalar, LO, GO, Node>::
2128 checkSizes (const ::Tpetra::SrcDistObject& source)
2131 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2132 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2135 std::ostream& err = markLocalErrorAndGetStream ();
2136 err <<
"checkSizes: The source object of the Import or Export "
2137 "must be a BlockCrsMatrix with the same template parameters as the "
2138 "target object." << endl;
2143 if (src->getBlockSize () != this->getBlockSize ()) {
2144 std::ostream& err = markLocalErrorAndGetStream ();
2145 err <<
"checkSizes: The source and target objects of the Import or "
2146 <<
"Export must have the same block sizes. The source's block "
2147 <<
"size = " << src->getBlockSize () <<
" != the target's block "
2148 <<
"size = " << this->getBlockSize () <<
"." << endl;
2150 if (! src->graph_.isFillComplete ()) {
2151 std::ostream& err = markLocalErrorAndGetStream ();
2152 err <<
"checkSizes: The source object of the Import or Export is "
2153 "not fill complete. Both source and target objects must be fill "
2154 "complete." << endl;
2156 if (! this->graph_.isFillComplete ()) {
2157 std::ostream& err = markLocalErrorAndGetStream ();
2158 err <<
"checkSizes: The target object of the Import or Export is "
2159 "not fill complete. Both source and target objects must be fill "
2160 "complete." << endl;
2162 if (src->graph_.getColMap ().is_null ()) {
2163 std::ostream& err = markLocalErrorAndGetStream ();
2164 err <<
"checkSizes: The source object of the Import or Export does "
2165 "not have a column Map. Both source and target objects must have "
2166 "column Maps." << endl;
2168 if (this->graph_.getColMap ().is_null ()) {
2169 std::ostream& err = markLocalErrorAndGetStream ();
2170 err <<
"checkSizes: The target object of the Import or Export does "
2171 "not have a column Map. Both source and target objects must have "
2172 "column Maps." << endl;
2176 return ! (* (this->localError_));
2179 template<
class Scalar,
class LO,
class GO,
class Node>
2181 BlockCrsMatrix<Scalar, LO, GO, Node>::
2182 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2184 #else // TPETRA_ENABLE_DEPRECATED_CODE
2186 #endif // TPETRA_ENABLE_DEPRECATED_CODE
2187 (const ::Tpetra::SrcDistObject& source,
2188 const size_t numSameIDs,
2189 const Kokkos::DualView<
const local_ordinal_type*,
2190 buffer_device_type>& permuteToLIDs,
2191 const Kokkos::DualView<
const local_ordinal_type*,
2192 buffer_device_type>& permuteFromLIDs)
2194 using ::Tpetra::Details::Behavior;
2196 using ::Tpetra::Details::ProfilingRegion;
2200 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
2201 const bool debug = Behavior::debug();
2202 const bool verbose = Behavior::verbose();
2207 std::ostringstream os;
2208 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2209 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
2214 if (* (this->localError_)) {
2215 std::ostream& err = this->markLocalErrorAndGetStream ();
2217 <<
"The target object of the Import or Export is already in an error state."
2226 std::ostringstream os;
2227 os << prefix << endl
2230 std::cerr << os.str ();
2236 if (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0)) {
2237 std::ostream& err = this->markLocalErrorAndGetStream ();
2239 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent (0)
2240 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
2244 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
2245 std::ostream& err = this->markLocalErrorAndGetStream ();
2247 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
2252 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2254 std::ostream& err = this->markLocalErrorAndGetStream ();
2256 <<
"The source (input) object of the Import or "
2257 "Export is either not a BlockCrsMatrix, or does not have the right "
2258 "template parameters. checkSizes() should have caught this. "
2259 "Please report this bug to the Tpetra developers." << endl;
2268 const_cast<this_type*
>(src)->sync_host();
2272 bool lclErr =
false;
2273 #ifdef HAVE_TPETRA_DEBUG
2274 std::set<LO> invalidSrcCopyRows;
2275 std::set<LO> invalidDstCopyRows;
2276 std::set<LO> invalidDstCopyCols;
2277 std::set<LO> invalidDstPermuteCols;
2278 std::set<LO> invalidPermuteFromRows;
2279 #endif // HAVE_TPETRA_DEBUG
2288 #ifdef HAVE_TPETRA_DEBUG
2289 const map_type& srcRowMap = * (src->graph_.getRowMap ());
2290 #endif // HAVE_TPETRA_DEBUG
2291 const map_type& dstRowMap = * (this->graph_.getRowMap ());
2292 const map_type& srcColMap = * (src->graph_.getColMap ());
2293 const map_type& dstColMap = * (this->graph_.getColMap ());
2294 const bool canUseLocalColumnIndices = srcColMap.locallySameAs (dstColMap);
2296 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.extent(0));
2298 std::ostringstream os;
2300 <<
"canUseLocalColumnIndices: "
2301 << (canUseLocalColumnIndices ?
"true" :
"false")
2302 <<
", numPermute: " << numPermute
2304 std::cerr << os.str ();
2307 const auto permuteToLIDsHost = permuteToLIDs.view_host();
2308 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
2310 if (canUseLocalColumnIndices) {
2312 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2313 #ifdef HAVE_TPETRA_DEBUG
2314 if (! srcRowMap.isNodeLocalElement (localRow)) {
2316 invalidSrcCopyRows.insert (localRow);
2319 #endif // HAVE_TPETRA_DEBUG
2321 const LO* lclSrcCols;
2326 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2329 #ifdef HAVE_TPETRA_DEBUG
2330 (void) invalidSrcCopyRows.insert (localRow);
2331 #endif // HAVE_TPETRA_DEBUG
2334 err = this->replaceLocalValues (localRow, lclSrcCols, vals, numEntries);
2335 if (err != numEntries) {
2337 if (! dstRowMap.isNodeLocalElement (localRow)) {
2338 #ifdef HAVE_TPETRA_DEBUG
2339 invalidDstCopyRows.insert (localRow);
2340 #endif // HAVE_TPETRA_DEBUG
2348 for (LO k = 0; k < numEntries; ++k) {
2349 if (! dstColMap.isNodeLocalElement (lclSrcCols[k])) {
2351 #ifdef HAVE_TPETRA_DEBUG
2352 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
2353 #endif // HAVE_TPETRA_DEBUG
2362 for (
size_t k = 0; k < numPermute; ++k) {
2363 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
2364 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
2366 const LO* lclSrcCols;
2369 LO err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2372 #ifdef HAVE_TPETRA_DEBUG
2373 invalidPermuteFromRows.insert (srcLclRow);
2374 #endif // HAVE_TPETRA_DEBUG
2377 err = this->replaceLocalValues (dstLclRow, lclSrcCols, vals, numEntries);
2378 if (err != numEntries) {
2380 #ifdef HAVE_TPETRA_DEBUG
2381 for (LO c = 0; c < numEntries; ++c) {
2382 if (! dstColMap.isNodeLocalElement (lclSrcCols[c])) {
2383 invalidDstPermuteCols.insert (lclSrcCols[c]);
2386 #endif // HAVE_TPETRA_DEBUG
2393 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
2394 Teuchos::Array<LO> lclDstCols (maxNumEnt);
2397 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2398 const LO* lclSrcCols;
2405 err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2406 }
catch (std::exception& e) {
2408 std::ostringstream os;
2409 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2410 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2411 << localRow <<
", src->getLocalRowView() threw an exception: "
2413 std::cerr << os.str ();
2420 #ifdef HAVE_TPETRA_DEBUG
2421 invalidSrcCopyRows.insert (localRow);
2422 #endif // HAVE_TPETRA_DEBUG
2425 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2428 std::ostringstream os;
2429 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2430 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2431 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
2432 << maxNumEnt << endl;
2433 std::cerr << os.str ();
2439 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2440 for (LO j = 0; j < numEntries; ++j) {
2441 lclDstColsView[j] = dstColMap.getLocalElement (srcColMap.getGlobalElement (lclSrcCols[j]));
2442 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2444 #ifdef HAVE_TPETRA_DEBUG
2445 invalidDstCopyCols.insert (lclDstColsView[j]);
2446 #endif // HAVE_TPETRA_DEBUG
2450 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (), vals, numEntries);
2451 }
catch (std::exception& e) {
2453 std::ostringstream os;
2454 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2455 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2456 << localRow <<
", this->replaceLocalValues() threw an exception: "
2458 std::cerr << os.str ();
2462 if (err != numEntries) {
2465 std::ostringstream os;
2466 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2467 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
2468 "localRow " << localRow <<
", this->replaceLocalValues "
2469 "returned " << err <<
" instead of numEntries = "
2470 << numEntries << endl;
2471 std::cerr << os.str ();
2479 for (
size_t k = 0; k < numPermute; ++k) {
2480 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
2481 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
2483 const LO* lclSrcCols;
2488 err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2489 }
catch (std::exception& e) {
2491 std::ostringstream os;
2492 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2493 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
2494 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
2495 <<
", src->getLocalRowView() threw an exception: " << e.what ();
2496 std::cerr << os.str ();
2503 #ifdef HAVE_TPETRA_DEBUG
2504 invalidPermuteFromRows.insert (srcLclRow);
2505 #endif // HAVE_TPETRA_DEBUG
2508 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2514 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2515 for (LO j = 0; j < numEntries; ++j) {
2516 lclDstColsView[j] = dstColMap.getLocalElement (srcColMap.getGlobalElement (lclSrcCols[j]));
2517 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2519 #ifdef HAVE_TPETRA_DEBUG
2520 invalidDstPermuteCols.insert (lclDstColsView[j]);
2521 #endif // HAVE_TPETRA_DEBUG
2524 err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (), vals, numEntries);
2525 if (err != numEntries) {
2534 std::ostream& err = this->markLocalErrorAndGetStream ();
2535 #ifdef HAVE_TPETRA_DEBUG
2536 err <<
"copyAndPermute: The graph structure of the source of the "
2537 "Import or Export must be a subset of the graph structure of the "
2539 err <<
"invalidSrcCopyRows = [";
2540 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
2541 it != invalidSrcCopyRows.end (); ++it) {
2543 typename std::set<LO>::const_iterator itp1 = it;
2545 if (itp1 != invalidSrcCopyRows.end ()) {
2549 err <<
"], invalidDstCopyRows = [";
2550 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
2551 it != invalidDstCopyRows.end (); ++it) {
2553 typename std::set<LO>::const_iterator itp1 = it;
2555 if (itp1 != invalidDstCopyRows.end ()) {
2559 err <<
"], invalidDstCopyCols = [";
2560 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
2561 it != invalidDstCopyCols.end (); ++it) {
2563 typename std::set<LO>::const_iterator itp1 = it;
2565 if (itp1 != invalidDstCopyCols.end ()) {
2569 err <<
"], invalidDstPermuteCols = [";
2570 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
2571 it != invalidDstPermuteCols.end (); ++it) {
2573 typename std::set<LO>::const_iterator itp1 = it;
2575 if (itp1 != invalidDstPermuteCols.end ()) {
2579 err <<
"], invalidPermuteFromRows = [";
2580 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
2581 it != invalidPermuteFromRows.end (); ++it) {
2583 typename std::set<LO>::const_iterator itp1 = it;
2585 if (itp1 != invalidPermuteFromRows.end ()) {
2591 err <<
"copyAndPermute: The graph structure of the source of the "
2592 "Import or Export must be a subset of the graph structure of the "
2594 #endif // HAVE_TPETRA_DEBUG
2598 std::ostringstream os;
2599 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2600 const bool lclSuccess = ! (* (this->localError_));
2601 os <<
"*** Proc " << myRank <<
": copyAndPermute "
2602 << (lclSuccess ?
"succeeded" :
"FAILED");
2606 os <<
": error messages: " << this->errorMessages ();
2608 std::cerr << os.str ();
2632 template<
class LO,
class GO,
class D>
2634 packRowCount (
const size_t numEnt,
2635 const size_t numBytesPerValue,
2636 const size_t blkSize)
2638 using ::Tpetra::Details::PackTraits;
2648 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2649 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2650 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2651 return numEntLen + gidsLen + valsLen;
2665 template<
class ST,
class LO,
class GO,
class D>
2667 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO, D>::input_buffer_type& imports,
2668 const size_t offset,
2669 const size_t numBytes,
2672 using Kokkos::subview;
2673 using ::Tpetra::Details::PackTraits;
2675 if (numBytes == 0) {
2677 return static_cast<size_t> (0);
2681 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
2682 TEUCHOS_TEST_FOR_EXCEPTION
2683 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2684 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2686 const char*
const inBuf = imports.data () + offset;
2687 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
2688 TEUCHOS_TEST_FOR_EXCEPTION
2689 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2690 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2692 return static_cast<size_t> (numEntLO);
2699 template<
class ST,
class LO,
class GO,
class D>
2701 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO, D>::output_buffer_type exports,
2702 const size_t offset,
2703 const size_t numEnt,
2704 const typename ::Tpetra::Details::PackTraits<GO, D>::input_array_type& gidsIn,
2705 const typename ::Tpetra::Details::PackTraits<ST, D>::input_array_type& valsIn,
2706 const size_t numBytesPerValue,
2707 const size_t blockSize)
2709 using Kokkos::subview;
2710 using ::Tpetra::Details::PackTraits;
2716 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2719 const LO numEntLO =
static_cast<size_t> (numEnt);
2721 const size_t numEntBeg = offset;
2722 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2723 const size_t gidsBeg = numEntBeg + numEntLen;
2724 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2725 const size_t valsBeg = gidsBeg + gidsLen;
2726 const size_t valsLen = numScalarEnt * numBytesPerValue;
2728 char*
const numEntOut = exports.data () + numEntBeg;
2729 char*
const gidsOut = exports.data () + gidsBeg;
2730 char*
const valsOut = exports.data () + valsBeg;
2732 size_t numBytesOut = 0;
2734 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
2737 Kokkos::pair<int, size_t> p;
2738 p = PackTraits<GO, D>::packArray (gidsOut, gidsIn.data (), numEnt);
2739 errorCode += p.first;
2740 numBytesOut += p.second;
2742 p = PackTraits<ST, D>::packArray (valsOut, valsIn.data (), numScalarEnt);
2743 errorCode += p.first;
2744 numBytesOut += p.second;
2747 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2748 TEUCHOS_TEST_FOR_EXCEPTION
2749 (numBytesOut != expectedNumBytes, std::logic_error,
2750 "packRowForBlockCrs: numBytesOut = " << numBytesOut
2751 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2753 TEUCHOS_TEST_FOR_EXCEPTION
2754 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
2755 "PackTraits::packArray returned a nonzero error code");
2761 template<
class ST,
class LO,
class GO,
class D>
2763 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO, D>::output_array_type& gidsOut,
2764 const typename ::Tpetra::Details::PackTraits<ST, D>::output_array_type& valsOut,
2765 const typename ::Tpetra::Details::PackTraits<int, D>::input_buffer_type& imports,
2766 const size_t offset,
2767 const size_t numBytes,
2768 const size_t numEnt,
2769 const size_t numBytesPerValue,
2770 const size_t blockSize)
2772 using ::Tpetra::Details::PackTraits;
2774 if (numBytes == 0) {
2778 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2779 TEUCHOS_TEST_FOR_EXCEPTION
2780 (static_cast<size_t> (imports.extent (0)) <= offset,
2781 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2782 << imports.extent (0) <<
" <= offset = " << offset <<
".");
2783 TEUCHOS_TEST_FOR_EXCEPTION
2784 (static_cast<size_t> (imports.extent (0)) < offset + numBytes,
2785 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2786 << imports.extent (0) <<
" < offset + numBytes = "
2787 << (offset + numBytes) <<
".");
2792 const size_t numEntBeg = offset;
2793 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
2794 const size_t gidsBeg = numEntBeg + numEntLen;
2795 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2796 const size_t valsBeg = gidsBeg + gidsLen;
2797 const size_t valsLen = numScalarEnt * numBytesPerValue;
2799 const char*
const numEntIn = imports.data () + numEntBeg;
2800 const char*
const gidsIn = imports.data () + gidsBeg;
2801 const char*
const valsIn = imports.data () + valsBeg;
2803 size_t numBytesOut = 0;
2806 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
2807 TEUCHOS_TEST_FOR_EXCEPTION
2808 (static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2809 "unpackRowForBlockCrs: Expected number of entries " << numEnt
2810 <<
" != actual number of entries " << numEntOut <<
".");
2813 Kokkos::pair<int, size_t> p;
2814 p = PackTraits<GO, D>::unpackArray (gidsOut.data (), gidsIn, numEnt);
2815 errorCode += p.first;
2816 numBytesOut += p.second;
2818 p = PackTraits<ST, D>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
2819 errorCode += p.first;
2820 numBytesOut += p.second;
2823 TEUCHOS_TEST_FOR_EXCEPTION
2824 (numBytesOut != numBytes, std::logic_error,
2825 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2826 <<
" != numBytes = " << numBytes <<
".");
2828 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2829 TEUCHOS_TEST_FOR_EXCEPTION
2830 (numBytesOut != expectedNumBytes, std::logic_error,
2831 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2832 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2834 TEUCHOS_TEST_FOR_EXCEPTION
2835 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
2836 "PackTraits::unpackArray returned a nonzero error code");
2842 template<
class Scalar,
class LO,
class GO,
class Node>
2844 BlockCrsMatrix<Scalar, LO, GO, Node>::
2845 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2847 #else // TPETRA_ENABLE_DEPRECATED_CODE
2849 #endif // TPETRA_ENABLE_DEPRECATED_CODE
2850 (const ::Tpetra::SrcDistObject& source,
2851 const Kokkos::DualView<
const local_ordinal_type*,
2852 buffer_device_type>& exportLIDs,
2853 Kokkos::DualView<packet_type*,
2854 buffer_device_type>& exports,
2855 Kokkos::DualView<
size_t*,
2856 buffer_device_type> numPacketsPerLID,
2857 size_t& constantNumPackets,
2860 using ::Tpetra::Details::Behavior;
2862 using ::Tpetra::Details::ProfilingRegion;
2863 using ::Tpetra::Details::PackTraits;
2865 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space host_exec;
2869 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
2871 const bool debug = Behavior::debug();
2872 const bool verbose = Behavior::verbose();
2877 std::ostringstream os;
2878 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2879 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
2884 if (* (this->localError_)) {
2885 std::ostream& err = this->markLocalErrorAndGetStream ();
2887 <<
"The target object of the Import or Export is already in an error state."
2896 std::ostringstream os;
2897 os << prefix << std::endl
2901 std::cerr << os.str ();
2907 if (exportLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2908 std::ostream& err = this->markLocalErrorAndGetStream ();
2910 <<
"exportLIDs.extent(0) = " << exportLIDs.extent (0)
2911 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2912 <<
"." << std::endl;
2915 if (exportLIDs.need_sync_host ()) {
2916 std::ostream& err = this->markLocalErrorAndGetStream ();
2917 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
2921 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2923 std::ostream& err = this->markLocalErrorAndGetStream ();
2925 <<
"The source (input) object of the Import or "
2926 "Export is either not a BlockCrsMatrix, or does not have the right "
2927 "template parameters. checkSizes() should have caught this. "
2928 "Please report this bug to the Tpetra developers." << std::endl;
2940 constantNumPackets = 0;
2943 const crs_graph_type& srcGraph = src->graph_;
2944 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2945 const size_t numExportLIDs = exportLIDs.extent (0);
2946 const size_t numBytesPerValue =
2947 PackTraits<impl_scalar_type, host_exec>
2948 ::packValueCount(this->val_.extent(0) ? this->val_.view_host()(0) : impl_scalar_type());
2954 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
2957 auto exportLIDsHost = exportLIDs.view_host();
2958 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2959 numPacketsPerLID.modify_host ();
2961 using reducer_type = Impl::BlockCrsReducer<Impl::BlockCrsRowStruct<size_t>,host_exec>;
2962 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs);
2963 Kokkos::parallel_reduce
2965 [=](
const size_t &i,
typename reducer_type::value_type &update) {
2966 const LO lclRow = exportLIDsHost(i);
2967 size_t numEnt = srcGraph.getNumEntriesInLocalRow (lclRow);
2968 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid () ? 0 : numEnt);
2970 const size_t numBytes =
2971 packRowCount<LO, GO, host_exec> (numEnt, numBytesPerValue, blockSize);
2972 numPacketsPerLIDHost(i) = numBytes;
2973 update +=
typename reducer_type::value_type(numEnt, numBytes, numEnt);
2974 }, rowReducerStruct);
2980 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
2981 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
2982 const size_t maxRowLength = rowReducerStruct.maxRowLength;
2985 std::ostringstream os;
2987 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2989 std::cerr << os.str ();
2995 exports.resize (totalNumBytes);
2996 if (totalNumEntries > 0) {
2998 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs+1);
3000 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs+1);
3001 Kokkos::parallel_scan
3003 [=](
const size_t &i,
size_t &update,
const bool &
final) {
3004 if (
final) offset(i) = update;
3005 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
3008 if (offset(numExportLIDs) != totalNumBytes) {
3009 std::ostream& err = this->markLocalErrorAndGetStream ();
3011 <<
"At end of method, the final offset (in bytes) "
3012 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
3013 << totalNumBytes <<
". "
3014 <<
"Please report this bug to the Tpetra developers." << std::endl;
3024 const map_type& srcColMap = * (srcGraph.getColMap ());
3028 exports.modify_host();
3030 typedef Kokkos::TeamPolicy<host_exec> policy_type;
3032 policy_type(numExportLIDs, 1, 1)
3033 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
3034 Kokkos::parallel_for
3036 [=](
const typename policy_type::member_type &member) {
3037 const size_t i = member.league_rank();
3038 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
3039 gblColInds(member.team_scratch(0), maxRowLength);
3041 const LO lclRowInd = exportLIDsHost(i);
3042 const LO* lclColIndsRaw;
3048 (void) src->getLocalRowView (lclRowInd, lclColIndsRaw, valsRaw, numEntLO);
3050 const size_t numEnt =
static_cast<size_t> (numEntLO);
3051 Kokkos::View<const LO*,host_exec> lclColInds (lclColIndsRaw, numEnt);
3054 for (
size_t j = 0; j < numEnt; ++j)
3055 gblColInds(j) = srcColMap.getGlobalElement (lclColInds(j));
3061 const size_t numBytes = packRowForBlockCrs<impl_scalar_type,LO,GO,host_exec>
3062 (exports.view_host(),
3065 Kokkos::View<const GO*,host_exec>(gblColInds.data(), maxRowLength),
3066 Kokkos::View<const impl_scalar_type*,host_exec>(reinterpret_cast<const impl_scalar_type*>(valsRaw), numEnt*blockSize*blockSize),
3072 const size_t offsetDiff = offset(i+1) - offset(i);
3073 if (numBytes != offsetDiff) {
3074 std::ostringstream os;
3076 <<
"numBytes computed from packRowForBlockCrs is different from "
3077 <<
"precomputed offset values, LID = " << i << std::endl;
3078 std::cerr << os.str ();
3086 std::ostringstream os;
3087 const bool lclSuccess = ! (* (this->localError_));
3089 << (lclSuccess ?
"succeeded" :
"FAILED")
3091 std::cerr << os.str ();
3095 template<
class Scalar,
class LO,
class GO,
class Node>
3097 BlockCrsMatrix<Scalar, LO, GO, Node>::
3098 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3100 #else // TPETRA_ENABLE_DEPRECATED_CODE
3102 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3103 (
const Kokkos::DualView<
const local_ordinal_type*,
3104 buffer_device_type>& importLIDs,
3105 Kokkos::DualView<packet_type*,
3106 buffer_device_type> imports,
3107 Kokkos::DualView<
size_t*,
3108 buffer_device_type> numPacketsPerLID,
3113 using ::Tpetra::Details::Behavior;
3115 using ::Tpetra::Details::ProfilingRegion;
3116 using ::Tpetra::Details::PackTraits;
3119 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
3121 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
3122 const bool verbose = Behavior::verbose ();
3127 std::ostringstream os;
3128 auto map = this->graph_.getRowMap ();
3129 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
3130 const int myRank = comm.is_null () ? -1 : comm->getRank ();
3131 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
3134 os <<
"Start" << endl;
3135 std::cerr << os.str ();
3140 if (* (this->localError_)) {
3141 std::ostream& err = this->markLocalErrorAndGetStream ();
3142 std::ostringstream os;
3143 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
3145 std::cerr << os.str ();
3154 if (importLIDs.extent (0) != numPacketsPerLID.extent (0)) {
3155 std::ostream& err = this->markLocalErrorAndGetStream ();
3156 std::ostringstream os;
3157 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent (0)
3158 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
3161 std::cerr << os.str ();
3167 if (combineMode !=
ADD && combineMode !=
INSERT &&
3169 combineMode != ZERO) {
3170 std::ostream& err = this->markLocalErrorAndGetStream ();
3171 std::ostringstream os;
3173 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
3174 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
3177 std::cerr << os.str ();
3183 if (this->graph_.getColMap ().is_null ()) {
3184 std::ostream& err = this->markLocalErrorAndGetStream ();
3185 std::ostringstream os;
3186 os << prefix <<
"Target matrix's column Map is null." << endl;
3188 std::cerr << os.str ();
3197 const map_type& tgtColMap = * (this->graph_.getColMap ());
3200 const size_t blockSize = this->getBlockSize ();
3201 const size_t numImportLIDs = importLIDs.extent(0);
3208 const size_t numBytesPerValue =
3209 PackTraits<impl_scalar_type, host_exec>::packValueCount
3210 (this->val_.extent (0) ? this->val_.view_host () (0) : impl_scalar_type ());
3211 const size_t maxRowNumEnt = graph_.getNodeMaxNumRowEntries ();
3212 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
3215 std::ostringstream os;
3216 os << prefix <<
"combineMode: "
3217 << ::Tpetra::combineModeToString (combineMode)
3218 <<
", blockSize: " << blockSize
3219 <<
", numImportLIDs: " << numImportLIDs
3220 <<
", numBytesPerValue: " << numBytesPerValue
3221 <<
", maxRowNumEnt: " << maxRowNumEnt
3222 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
3223 std::cerr << os.str ();
3226 if (combineMode == ZERO || numImportLIDs == 0) {
3228 std::ostringstream os;
3229 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
3230 std::cerr << os.str ();
3237 if (imports.need_sync_host ()) {
3238 imports.sync_host ();
3248 if (imports.need_sync_host () || numPacketsPerLID.need_sync_host () ||
3249 importLIDs.need_sync_host ()) {
3250 std::ostream& err = this->markLocalErrorAndGetStream ();
3251 std::ostringstream os;
3252 os << prefix <<
"All input DualViews must be sync'd to host by now. "
3253 <<
"imports_nc.need_sync_host()="
3254 << (imports.need_sync_host () ?
"true" :
"false")
3255 <<
", numPacketsPerLID.need_sync_host()="
3256 << (numPacketsPerLID.need_sync_host () ?
"true" :
"false")
3257 <<
", importLIDs.need_sync_host()="
3258 << (importLIDs.need_sync_host () ?
"true" :
"false")
3261 std::cerr << os.str ();
3267 const auto importLIDsHost = importLIDs.view_host ();
3268 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host ();
3274 Kokkos::View<size_t*, host_exec> offset (
"offset", numImportLIDs+1);
3276 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numImportLIDs+1);
3277 Kokkos::parallel_scan
3278 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
3279 [=] (
const size_t &i,
size_t &update,
const bool &
final) {
3280 if (
final) offset(i) = update;
3281 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
3291 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
3292 errorDuringUnpack (
"errorDuringUnpack");
3293 errorDuringUnpack () = 0;
3295 using policy_type = Kokkos::TeamPolicy<host_exec>;
3296 const auto policy = policy_type (numImportLIDs, 1, 1)
3297 .set_scratch_size (0, Kokkos::PerTeam (
sizeof (GO) * maxRowNumEnt +
3298 sizeof (LO) * maxRowNumEnt +
3299 numBytesPerValue * maxRowNumScalarEnt));
3300 using host_scratch_space =
typename host_exec::scratch_memory_space;
3301 using pair_type = Kokkos::pair<size_t, size_t>;
3302 Kokkos::parallel_for
3303 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
3304 [=] (
const typename policy_type::member_type& member) {
3305 const size_t i = member.league_rank();
3307 Kokkos::View<GO*, host_scratch_space> gblColInds
3308 (member.team_scratch (0), maxRowNumEnt);
3309 Kokkos::View<LO*, host_scratch_space> lclColInds
3310 (member.team_scratch (0), maxRowNumEnt);
3311 Kokkos::View<impl_scalar_type*, host_scratch_space> vals
3312 (member.team_scratch (0), maxRowNumScalarEnt);
3314 const size_t offval = offset(i);
3315 const LO lclRow = importLIDsHost(i);
3316 const size_t numBytes = numPacketsPerLIDHost(i);
3317 const size_t numEnt =
3318 unpackRowCount<impl_scalar_type, LO, GO, host_exec>
3319 (imports.view_host (), offval, numBytes, numBytesPerValue);
3322 if (numEnt > maxRowNumEnt) {
3323 errorDuringUnpack() = 1;
3325 std::ostringstream os;
3327 <<
"At i = " << i <<
", numEnt = " << numEnt
3328 <<
" > maxRowNumEnt = " << maxRowNumEnt
3330 std::cerr << os.str ();
3335 const size_t numScalarEnt = numEnt*blockSize*blockSize;
3336 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
3337 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
3338 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
3343 size_t numBytesOut = 0;
3346 unpackRowForBlockCrs<impl_scalar_type, LO, GO, host_exec>
3347 (Kokkos::View<GO*,host_exec>(gidsOut.data(), numEnt),
3348 Kokkos::View<impl_scalar_type*,host_exec>(valsOut.data(), numScalarEnt),
3349 imports.view_host(),
3350 offval, numBytes, numEnt,
3351 numBytesPerValue, blockSize);
3353 catch (std::exception& e) {
3354 errorDuringUnpack() = 1;
3356 std::ostringstream os;
3357 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
3358 << e.what () << endl;
3359 std::cerr << os.str ();
3364 if (numBytes != numBytesOut) {
3365 errorDuringUnpack() = 1;
3367 std::ostringstream os;
3369 <<
"At i = " << i <<
", numBytes = " << numBytes
3370 <<
" != numBytesOut = " << numBytesOut <<
"."
3372 std::cerr << os.str();
3378 for (
size_t k = 0; k < numEnt; ++k) {
3379 lidsOut(k) = tgtColMap.getLocalElement (gidsOut(k));
3380 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
3382 std::ostringstream os;
3384 <<
"At i = " << i <<
", GID " << gidsOut(k)
3385 <<
" is not owned by the calling process."
3387 std::cerr << os.str();
3395 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
3396 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>
3397 (
const_cast<const impl_scalar_type*
> (valsOut.data ()));
3398 if (combineMode ==
ADD) {
3399 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3400 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
3401 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3402 }
else if (combineMode ==
ABSMAX) {
3403 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3406 if (static_cast<LO> (numEnt) != numCombd) {
3407 errorDuringUnpack() = 1;
3409 std::ostringstream os;
3410 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3411 <<
" != numCombd = " << numCombd <<
"."
3413 std::cerr << os.str ();
3420 if (errorDuringUnpack () != 0) {
3421 std::ostream& err = this->markLocalErrorAndGetStream ();
3422 err << prefix <<
"Unpacking failed.";
3424 err <<
" Please run again with the environment variable setting "
3425 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
3431 std::ostringstream os;
3432 const bool lclSuccess = ! (* (this->localError_));
3434 << (lclSuccess ?
"succeeded" :
"FAILED")
3436 std::cerr << os.str ();
3440 template<
class Scalar,
class LO,
class GO,
class Node>
3444 using Teuchos::TypeNameTraits;
3445 std::ostringstream os;
3446 os <<
"\"Tpetra::BlockCrsMatrix\": { "
3447 <<
"Template parameters: { "
3448 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
3449 <<
"LO: " << TypeNameTraits<LO>::name ()
3450 <<
"GO: " << TypeNameTraits<GO>::name ()
3451 <<
"Node: " << TypeNameTraits<Node>::name ()
3453 <<
", Label: \"" << this->getObjectLabel () <<
"\""
3454 <<
", Global dimensions: ["
3455 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3456 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
3457 <<
", Block size: " << getBlockSize ()
3463 template<
class Scalar,
class LO,
class GO,
class Node>
3467 const Teuchos::EVerbosityLevel verbLevel)
const
3469 using Teuchos::ArrayRCP;
3470 using Teuchos::CommRequest;
3471 using Teuchos::FancyOStream;
3472 using Teuchos::getFancyOStream;
3473 using Teuchos::ireceive;
3474 using Teuchos::isend;
3475 using Teuchos::outArg;
3476 using Teuchos::TypeNameTraits;
3477 using Teuchos::VERB_DEFAULT;
3478 using Teuchos::VERB_NONE;
3479 using Teuchos::VERB_LOW;
3480 using Teuchos::VERB_MEDIUM;
3482 using Teuchos::VERB_EXTREME;
3484 using Teuchos::wait;
3486 #ifdef HAVE_TPETRA_DEBUG
3487 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::describe: ";
3488 #endif // HAVE_TPETRA_DEBUG
3491 const Teuchos::EVerbosityLevel vl =
3492 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
3494 if (vl == VERB_NONE) {
3499 Teuchos::OSTab tab0 (out);
3501 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
3502 Teuchos::OSTab tab1 (out);
3504 out <<
"Template parameters:" << endl;
3506 Teuchos::OSTab tab2 (out);
3507 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
3508 <<
"LO: " << TypeNameTraits<LO>::name () << endl
3509 <<
"GO: " << TypeNameTraits<GO>::name () << endl
3510 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
3512 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
3513 <<
"Global dimensions: ["
3514 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3515 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
3517 const LO blockSize = getBlockSize ();
3518 out <<
"Block size: " << blockSize << endl;
3521 if (vl >= VERB_MEDIUM) {
3522 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3523 const int myRank = comm.getRank ();
3525 out <<
"Row Map:" << endl;
3527 getRowMap()->describe (out, vl);
3529 if (! getColMap ().is_null ()) {
3530 if (getColMap() == getRowMap()) {
3532 out <<
"Column Map: same as row Map" << endl;
3537 out <<
"Column Map:" << endl;
3539 getColMap ()->describe (out, vl);
3542 if (! getDomainMap ().is_null ()) {
3543 if (getDomainMap () == getRowMap ()) {
3545 out <<
"Domain Map: same as row Map" << endl;
3548 else if (getDomainMap () == getColMap ()) {
3550 out <<
"Domain Map: same as column Map" << endl;
3555 out <<
"Domain Map:" << endl;
3557 getDomainMap ()->describe (out, vl);
3560 if (! getRangeMap ().is_null ()) {
3561 if (getRangeMap () == getDomainMap ()) {
3563 out <<
"Range Map: same as domain Map" << endl;
3566 else if (getRangeMap () == getRowMap ()) {
3568 out <<
"Range Map: same as row Map" << endl;
3573 out <<
"Range Map: " << endl;
3575 getRangeMap ()->describe (out, vl);
3580 if (vl >= VERB_EXTREME) {
3586 const_cast<this_type*
> (
this)->sync_host ();
3588 #ifdef HAVE_TPETRA_DEBUG
3589 TEUCHOS_TEST_FOR_EXCEPTION
3590 (this->need_sync_host (), std::logic_error,
3591 prefix <<
"Right after sync to host, the matrix claims that it needs "
3592 "sync to host. Please report this bug to the Tpetra developers.");
3593 #endif // HAVE_TPETRA_DEBUG
3595 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3596 const int myRank = comm.getRank ();
3597 const int numProcs = comm.getSize ();
3600 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
3601 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
3602 FancyOStream& os = *osPtr;
3603 os <<
"Process " << myRank <<
":" << endl;
3604 Teuchos::OSTab tab2 (os);
3606 const map_type& meshRowMap = * (graph_.getRowMap ());
3607 const map_type& meshColMap = * (graph_.getColMap ());
3612 os <<
"Row " << meshGblRow <<
": {";
3614 const LO* lclColInds = NULL;
3615 Scalar* vals = NULL;
3617 this->getLocalRowView (meshLclRow, lclColInds, vals, numInds);
3619 for (LO k = 0; k < numInds; ++k) {
3622 os <<
"Col " << gblCol <<
": [";
3623 for (LO i = 0; i < blockSize; ++i) {
3624 for (LO j = 0; j < blockSize; ++j) {
3625 os << vals[blockSize*blockSize*k + i*blockSize + j];
3626 if (j + 1 < blockSize) {
3630 if (i + 1 < blockSize) {
3635 if (k + 1 < numInds) {
3645 out << lclOutStrPtr->str ();
3646 lclOutStrPtr = Teuchos::null;
3649 const int sizeTag = 1337;
3650 const int dataTag = 1338;
3652 ArrayRCP<char> recvDataBuf;
3656 for (
int p = 1; p < numProcs; ++p) {
3659 ArrayRCP<size_t> recvSize (1);
3661 RCP<CommRequest<int> > recvSizeReq =
3662 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
3663 wait<int> (comm, outArg (recvSizeReq));
3664 const size_t numCharsToRecv = recvSize[0];
3671 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
3672 recvDataBuf.resize (numCharsToRecv + 1);
3674 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
3676 RCP<CommRequest<int> > recvDataReq =
3677 ireceive<int, char> (recvData, p, dataTag, comm);
3678 wait<int> (comm, outArg (recvDataReq));
3683 recvDataBuf[numCharsToRecv] =
'\0';
3684 out << recvDataBuf.getRawPtr ();
3686 else if (myRank == p) {
3690 const std::string stringToSend = lclOutStrPtr->str ();
3691 lclOutStrPtr = Teuchos::null;
3694 const size_t numCharsToSend = stringToSend.size ();
3695 ArrayRCP<size_t> sendSize (1);
3696 sendSize[0] = numCharsToSend;
3697 RCP<CommRequest<int> > sendSizeReq =
3698 isend<int, size_t> (sendSize, 0, sizeTag, comm);
3699 wait<int> (comm, outArg (sendSizeReq));
3707 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
3708 RCP<CommRequest<int> > sendDataReq =
3709 isend<int, char> (sendData, 0, dataTag, comm);
3710 wait<int> (comm, outArg (sendDataReq));
3716 template<
class Scalar,
class LO,
class GO,
class Node>
3717 Teuchos::RCP<const Teuchos::Comm<int> >
3721 return graph_.getComm();
3724 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3725 template<
class Scalar,
class LO,
class GO,
class Node>
3731 return graph_.getNode();
3734 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3736 template<
class Scalar,
class LO,
class GO,
class Node>
3741 return graph_.getGlobalNumCols();
3744 template<
class Scalar,
class LO,
class GO,
class Node>
3749 return graph_.getNodeNumCols();
3752 template<
class Scalar,
class LO,
class GO,
class Node>
3757 return graph_.getIndexBase();
3760 template<
class Scalar,
class LO,
class GO,
class Node>
3765 return graph_.getGlobalNumEntries();
3768 template<
class Scalar,
class LO,
class GO,
class Node>
3773 return graph_.getNodeNumEntries();
3776 template<
class Scalar,
class LO,
class GO,
class Node>
3781 return graph_.getNumEntriesInGlobalRow(globalRow);
3784 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3785 template<
class Scalar,
class LO,
class GO,
class Node>
3790 using HDM = Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3791 return dynamic_cast<const HDM&
> (this->graph_).getGlobalNumDiagsImpl ();
3794 template<
class Scalar,
class LO,
class GO,
class Node>
3795 size_t TPETRA_DEPRECATED
3796 BlockCrsMatrix<Scalar, LO, GO, Node>::
3797 getNodeNumDiags ()
const
3799 using HDM = Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3800 return dynamic_cast<const HDM&
> (this->graph_).getNodeNumDiagsImpl ();
3802 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3804 template<
class Scalar,
class LO,
class GO,
class Node>
3809 return graph_.getGlobalMaxNumRowEntries();
3812 template<
class Scalar,
class LO,
class GO,
class Node>
3817 return graph_.hasColMap();
3820 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3821 template<
class Scalar,
class LO,
class GO,
class Node>
3822 bool TPETRA_DEPRECATED
3826 using HDM = ::Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3827 return dynamic_cast<const HDM&
> (this->graph_).isLowerTriangularImpl ();
3830 template<
class Scalar,
class LO,
class GO,
class Node>
3831 bool TPETRA_DEPRECATED
3832 BlockCrsMatrix<Scalar, LO, GO, Node>::
3833 isUpperTriangular ()
const
3835 using HDM = ::Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3836 return dynamic_cast<const HDM&
> (this->graph_).isUpperTriangularImpl ();
3838 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3840 template<
class Scalar,
class LO,
class GO,
class Node>
3845 return graph_.isLocallyIndexed();
3848 template<
class Scalar,
class LO,
class GO,
class Node>
3853 return graph_.isGloballyIndexed();
3856 template<
class Scalar,
class LO,
class GO,
class Node>
3861 return graph_.isFillComplete ();
3864 template<
class Scalar,
class LO,
class GO,
class Node>
3873 template<
class Scalar,
class LO,
class GO,
class Node>
3877 const Teuchos::ArrayView<GO> &,
3878 const Teuchos::ArrayView<Scalar> &,
3881 TEUCHOS_TEST_FOR_EXCEPTION(
3882 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowCopy: "
3883 "This class doesn't support global matrix indexing.");
3887 template<
class Scalar,
class LO,
class GO,
class Node>
3891 Teuchos::ArrayView<const GO> &,
3892 Teuchos::ArrayView<const Scalar> &)
const
3894 TEUCHOS_TEST_FOR_EXCEPTION(
3895 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowView: "
3896 "This class doesn't support global matrix indexing.");
3900 template<
class Scalar,
class LO,
class GO,
class Node>
3904 Teuchos::ArrayView<const LO>& ,
3905 Teuchos::ArrayView<const Scalar>& )
const
3907 TEUCHOS_TEST_FOR_EXCEPTION(
3908 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getLocalRowView: "
3909 "This class doesn't support local matrix indexing.");
3912 template<
class Scalar,
class LO,
class GO,
class Node>
3917 #ifdef HAVE_TPETRA_DEBUG
3918 const char prefix[] =
3919 "Tpetra::Experimental::BlockCrsMatrix::getLocalDiagCopy: ";
3920 #endif // HAVE_TPETRA_DEBUG
3922 const size_t lclNumMeshRows = graph_.getNodeNumRows ();
3924 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3925 graph_.getLocalDiagOffsets (diagOffsets);
3928 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3931 diag.template modify<typename decltype (diagOffsetsHost)::memory_space> ();
3933 #ifdef HAVE_TPETRA_DEBUG
3934 TEUCHOS_TEST_FOR_EXCEPTION
3935 (this->need_sync_host (), std::runtime_error,
3936 prefix <<
"The matrix's data were last modified on device, but have "
3937 "not been sync'd to host. Please sync to host (by calling "
3938 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
3940 #endif // HAVE_TPETRA_DEBUG
3942 auto vals_host_out = getValuesHost ();
3943 Scalar* vals_host_out_raw =
3944 reinterpret_cast<Scalar*
> (vals_host_out.data ());
3947 size_t rowOffset = 0;
3949 LO bs = getBlockSize();
3950 for(
size_t r=0; r<getNodeNumRows(); r++)
3953 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3954 for(
int b=0; b<bs; b++)
3959 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3962 diag.template sync<memory_space> ();
3965 template<
class Scalar,
class LO,
class GO,
class Node>
3970 TEUCHOS_TEST_FOR_EXCEPTION(
3971 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::leftScale: "
3972 "not implemented.");
3976 template<
class Scalar,
class LO,
class GO,
class Node>
3981 TEUCHOS_TEST_FOR_EXCEPTION(
3982 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::rightScale: "
3983 "not implemented.");
3987 template<
class Scalar,
class LO,
class GO,
class Node>
3988 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3995 template<
class Scalar,
class LO,
class GO,
class Node>
3996 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
4000 TEUCHOS_TEST_FOR_EXCEPTION(
4001 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getFrobeniusNorm: "
4002 "not implemented.");
4013 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
4014 namespace Experimental { \
4015 template class BlockCrsMatrix< S, LO, GO, NODE >; \
4018 #endif // TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP
LO absMaxLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValuesByOffsets, but for the ABSMAX combine mode.
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the left with the given Vector x.
virtual bool supportsRowViews() const
Whether this object implements getLocalRowView() and getGlobalRowView().
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
Teuchos::RCP< const map_type > getColMap() const
get the (mesh) map for the columns of this block matrix.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Kokkos::View< impl_scalar_type *, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_vec_type
The type used to access nonconst vector blocks.
Teuchos::RCP< const map_type > getRowMap() const
get the (mesh) map for the rows of this block matrix.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNodeNumRows() const
get the local number of block rows
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
One or more distributed dense vectors.
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the right with the given Vector x.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
Print a description of this object to the given output stream.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
MultiVector for multiple degrees of freedom per mesh point.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
global_size_t getGlobalNumRows() const
get the global number of block rows
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
LocalOrdinal getMinLocalIndex() const
The minimum local index.
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Like sumIntoLocalValues, but for the ABSMAX combine mode.
Teuchos::RCP< const map_type > getRangeMap() const
Get the (point) range Map of this matrix.
int local_ordinal_type
Default value of Scalar template parameter.
virtual bool hasColMap() const
Whether this matrix has a well-defined column Map.
Teuchos::RCP< const map_type > getDomainMap() const
Get the (point) domain Map of this matrix.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const
Get the (mesh) graph.
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
virtual GO getIndexBase() const
The index base for global indices in this matrix.
virtual bool isGloballyIndexed() const
Whether matrix indices are globally indexed.
size_t global_size_t
Global size_t object.
size_t getNodeNumEntries() const override
The local number of entries in the graph.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Insert new values that don't currently exist.
std::string description() const
One-line description of this object.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Linear algebra kernels for small dense matrices and vectors.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
virtual global_size_t getGlobalNumCols() const
The global number of columns of this matrix.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
virtual bool isFillComplete() const
Whether fillComplete() has been called.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
LO getNumVectors() const
Get the number of columns (vectors) in the BlockMultiVector.
Sets up and executes a communication plan for a Tpetra DistObject.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
virtual size_t getNodeNumCols() const
The number of columns needed to apply the forward operator on this node.
virtual size_t getNodeNumEntries() const
The local number of stored (structurally nonzero) entries.
CrsGraphType::global_ordinal_type getGlobalNumDiags(const CrsGraphType &G)
Number of populated diagonal entries in the given sparse graph, over all processes in the graph's (MP...
virtual global_size_t getGlobalNumEntries() const
The global number of stored (structurally nonzero) entries.
virtual void getGlobalRowCopy(GO GlobalRow, const Teuchos::ArrayView< GO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Get a copy of the given global row's entries.
void getLocalRowCopy(LO LocalRow, const Teuchos::ArrayView< LO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Not implemented.
Replace old value with maximum of magnitudes of old and new values.
LO getLocalRowView(const LO localRowInd, const LO *&colInds, Scalar *&vals, LO &numInds) const
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
Replace existing values with new values.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
Replace old values with zero.
void localGaussSeidel(const BlockMultiVector< Scalar, LO, GO, Node > &Residual, BlockMultiVector< Scalar, LO, GO, Node > &Solution, const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D_inv, const Scalar &omega, const ESweepDirection direction) const
Local Gauss-Seidel solve, given a factorized diagonal.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual size_t getGlobalMaxNumRowEntries() const
The maximum number of entries in any row over all processes in the matrix's communicator.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
void reorderedGaussSeidelCopy(::Tpetra::MultiVector< Scalar, LO, GO, Node > &X, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &B, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &D, const Teuchos::ArrayView< LO > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
A distributed dense vector.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const
The Frobenius norm of the matrix.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which this matrix is distributed.
void gaussSeidelCopy(MultiVector< Scalar, LO, GO, Node > &X, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &B, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
local_graph_type getLocalGraph() const
Get the local graph.
size_t getNodeMaxNumRowEntries() const
Maximum number of entries in any row of the matrix, on this process.
LocalOrdinal getMaxLocalIndex() const
The maximum local index on the calling process.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const
The current number of entries on the calling process in the specified global row. ...
size_t getNumEntriesInLocalRow(const LO localRowInd) const
Return the number of entries in the given row on the calling process.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value) const
Replace current value at the specified location with specified values.
virtual void getGlobalRowView(GO GlobalRow, Teuchos::ArrayView< const GO > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
virtual bool isLocallyIndexed() const
Whether matrix indices are locally indexed.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.