40 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
41 #define TPETRA_MULTIVECTOR_DEF_HPP
52 #include "Tpetra_Vector.hpp"
55 #include "Tpetra_Details_checkView.hpp"
64 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
65 # include "Teuchos_SerialDenseMatrix.hpp"
66 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
67 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
68 #include "KokkosCompat_View.hpp"
69 #include "KokkosBlas.hpp"
70 #include "KokkosKernels_Utils.hpp"
71 #include "Kokkos_Random.hpp"
72 #include "Kokkos_ArithTraits.hpp"
76 #ifdef HAVE_TPETRA_INST_FLOAT128
79 template<
class Generator>
80 struct rand<Generator, __float128> {
81 static KOKKOS_INLINE_FUNCTION __float128 max ()
83 return static_cast<__float128
> (1.0);
85 static KOKKOS_INLINE_FUNCTION __float128
90 const __float128 scalingFactor =
91 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
92 static_cast<__float128> (2.0);
93 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
94 const __float128 lowerOrderTerm =
95 static_cast<__float128
> (gen.drand ()) * scalingFactor;
96 return higherOrderTerm + lowerOrderTerm;
98 static KOKKOS_INLINE_FUNCTION __float128
99 draw (Generator& gen,
const __float128& range)
102 const __float128 scalingFactor =
103 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
104 static_cast<__float128> (2.0);
105 const __float128 higherOrderTerm =
106 static_cast<__float128
> (gen.drand (range));
107 const __float128 lowerOrderTerm =
108 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
109 return higherOrderTerm + lowerOrderTerm;
111 static KOKKOS_INLINE_FUNCTION __float128
112 draw (Generator& gen,
const __float128& start,
const __float128& end)
115 const __float128 scalingFactor =
116 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
117 static_cast<__float128> (2.0);
118 const __float128 higherOrderTerm =
119 static_cast<__float128
> (gen.drand (start, end));
120 const __float128 lowerOrderTerm =
121 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
122 return higherOrderTerm + lowerOrderTerm;
126 #endif // HAVE_TPETRA_INST_FLOAT128
144 template<
class ST,
class LO,
class GO,
class NT>
146 allocDualView (
const size_t lclNumRows,
147 const size_t numCols,
148 const bool zeroOut =
true,
149 const bool allowPadding =
false)
151 using ::Tpetra::Details::Behavior;
152 using Kokkos::AllowPadding;
153 using Kokkos::view_alloc;
154 using Kokkos::WithoutInitializing;
156 typedef typename dual_view_type::t_dev dev_view_type;
161 const std::string label (
"MV::DualView");
162 const bool debug = Behavior::debug ();
172 dev_view_type d_view;
175 d_view = dev_view_type (view_alloc (label, AllowPadding),
176 lclNumRows, numCols);
179 d_view = dev_view_type (view_alloc (label),
180 lclNumRows, numCols);
185 d_view = dev_view_type (view_alloc (label,
188 lclNumRows, numCols);
191 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
192 lclNumRows, numCols);
203 const ST nan = Kokkos::Details::ArithTraits<ST>::nan ();
204 KokkosBlas::fill (d_view, nan);
208 TEUCHOS_TEST_FOR_EXCEPTION
209 (static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
210 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
211 "allocDualView: d_view's dimensions actual dimensions do not match "
212 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
213 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
214 <<
". Please report this bug to the Tpetra developers.");
217 dual_view_type dv (d_view, Kokkos::create_mirror_view (d_view));
231 template<
class T,
class ExecSpace>
232 struct MakeUnmanagedView {
245 typedef typename Kokkos::Impl::if_c<
246 Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
247 typename ExecSpace::memory_space,
248 Kokkos::HostSpace>::value,
249 typename ExecSpace::device_type,
250 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
251 typedef Kokkos::LayoutLeft array_layout;
252 typedef Kokkos::View<T*, array_layout, host_exec_space,
253 Kokkos::MemoryUnmanaged> view_type;
255 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
257 const size_t numEnt =
static_cast<size_t> (x_in.size ());
261 return view_type (x_in.getRawPtr (), numEnt);
269 template<
class DualViewType>
271 takeSubview (
const DualViewType& X,
272 const Kokkos::Impl::ALL_t&,
273 const std::pair<size_t, size_t>& colRng)
275 if (X.extent (0) == 0 && X.extent (1) != 0) {
276 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
279 return subview (X, Kokkos::ALL (), colRng);
286 template<
class DualViewType>
288 takeSubview (
const DualViewType& X,
289 const std::pair<size_t, size_t>& rowRng,
290 const std::pair<size_t, size_t>& colRng)
292 if (X.extent (0) == 0 && X.extent (1) != 0) {
293 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
296 return subview (X, rowRng, colRng);
300 template<
class DualViewType>
302 getDualViewStride (
const DualViewType& dv)
306 const size_t LDA = dv.d_view.stride (1);
307 const size_t numRows = dv.extent (0);
310 return (numRows == 0) ? size_t (1) : numRows;
317 template<
class ViewType>
319 getViewStride (
const ViewType& view)
321 const size_t LDA = view.stride (1);
322 const size_t numRows = view.extent (0);
325 return (numRows == 0) ? size_t (1) : numRows;
332 template <
class impl_scalar_type,
class buffer_device_type>
334 runKernelOnHost ( Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports )
336 if (! imports.need_sync_device ()) {
341 return imports.extent(0) <= localLengthThreshold;
346 template <
class SC,
class LO,
class GO,
class NT>
348 runKernelOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
350 if (! X.need_sync_device ()) {
354 constexpr
size_t localLengthThreshold = 10000;
355 return X.getLocalLength () <= localLengthThreshold;
359 template <
class SC,
class LO,
class GO,
class NT>
361 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
367 using val_type =
typename MV::impl_scalar_type;
368 using mag_type =
typename MV::mag_type;
369 using dual_view_type =
typename MV::dual_view_type;
372 auto comm = map.is_null () ?
nullptr : map->getComm ().getRawPtr ();
373 auto whichVecs = getMultiVectorWhichVectors (X);
377 const bool runOnHost = runKernelOnHost (X);
379 using view_type =
typename dual_view_type::t_host;
380 using array_layout =
typename view_type::array_layout;
381 using device_type =
typename view_type::device_type;
387 normImpl<val_type, array_layout, device_type,
388 mag_type> (norms, X_lcl, whichNorm, whichVecs,
389 isConstantStride, isDistributed, comm);
392 using view_type =
typename dual_view_type::t_dev;
393 using array_layout =
typename view_type::array_layout;
394 using device_type =
typename view_type::device_type;
400 normImpl<val_type, array_layout, device_type,
401 mag_type> (norms, X_lcl, whichNorm, whichVecs,
402 isConstantStride, isDistributed, comm);
410 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
412 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
413 vectorIndexOutOfRange (
const size_t VectorIndex)
const {
414 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
417 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
423 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
426 const size_t numVecs,
427 const bool zeroOut) :
433 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
437 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
440 const Teuchos::DataAccess copyOrView) :
442 view_ (source.view_),
443 origView_ (source.origView_),
444 whichVectors_ (source.whichVectors_)
447 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
448 "const Teuchos::DataAccess): ";
452 if (copyOrView == Teuchos::Copy) {
456 this->
view_ = cpy.view_;
460 else if (copyOrView == Teuchos::View) {
463 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
464 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
465 "invalid value " << copyOrView <<
". Valid values include "
466 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
467 << Teuchos::View <<
".");
471 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
474 const dual_view_type& view) :
479 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
480 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
481 map->getNodeNumElements ();
482 const size_t lclNumRows_view = view.extent (0);
483 const size_t LDA = getDualViewStride (view);
485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
486 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
487 std::invalid_argument,
"Kokkos::DualView does not match Map. "
488 "map->getNodeNumElements() = " << lclNumRows_map
489 <<
", view.extent(0) = " << lclNumRows_view
490 <<
", and getStride() = " << LDA <<
".");
492 using ::Tpetra::Details::Behavior;
493 const bool debug = Behavior::debug ();
495 using ::Tpetra::Details::checkGlobalDualViewValidity;
496 std::ostringstream gblErrStrm;
497 const bool verbose = Behavior::verbose ();
498 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
499 const bool gblValid =
500 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
502 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
503 (! gblValid, std::runtime_error, gblErrStrm.str ());
508 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
511 const typename dual_view_type::t_dev& d_view) :
514 using Teuchos::ArrayRCP;
516 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
520 const size_t LDA = getViewStride (d_view);
521 const size_t lclNumRows = map->getNodeNumElements ();
522 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
523 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
524 "Kokkos::View. map->getNodeNumElements() = " << lclNumRows
525 <<
", View's column stride = " << LDA
526 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
528 auto h_view = Kokkos::create_mirror_view (d_view);
531 using ::Tpetra::Details::Behavior;
532 const bool debug = Behavior::debug ();
534 using ::Tpetra::Details::checkGlobalDualViewValidity;
535 std::ostringstream gblErrStrm;
536 const bool verbose = Behavior::verbose ();
537 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
538 const bool gblValid =
539 checkGlobalDualViewValidity (&gblErrStrm,
view_, verbose,
541 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
542 (! gblValid, std::runtime_error, gblErrStrm.str ());
551 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
554 const dual_view_type& view,
555 const dual_view_type& origView) :
560 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
562 const size_t LDA = getDualViewStride (origView);
564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
565 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
566 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
567 <<
". This may also mean that the input origView's first dimension (number "
568 "of rows = " << origView.extent (0) <<
") does not not match the number "
569 "of entries in the Map on the calling process.");
571 using ::Tpetra::Details::Behavior;
572 const bool debug = Behavior::debug ();
574 using ::Tpetra::Details::checkGlobalDualViewValidity;
575 std::ostringstream gblErrStrm;
576 const bool verbose = Behavior::verbose ();
577 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
578 const bool gblValid_0 =
579 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
581 const bool gblValid_1 =
582 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
584 const bool gblValid = gblValid_0 && gblValid_1;
585 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
586 (! gblValid, std::runtime_error, gblErrStrm.str ());
591 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
594 const dual_view_type& view,
595 const Teuchos::ArrayView<const size_t>& whichVectors) :
599 whichVectors_ (whichVectors.begin (), whichVectors.end ())
602 using Kokkos::subview;
603 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
605 using ::Tpetra::Details::Behavior;
606 const bool debug = Behavior::debug ();
608 using ::Tpetra::Details::checkGlobalDualViewValidity;
609 std::ostringstream gblErrStrm;
610 const bool verbose = Behavior::verbose ();
611 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
612 const bool gblValid =
613 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
615 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
616 (! gblValid, std::runtime_error, gblErrStrm.str ());
619 const size_t lclNumRows = map.is_null () ? size_t (0) :
620 map->getNodeNumElements ();
627 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
628 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
629 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
630 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
631 if (whichVectors.size () != 0) {
632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
633 view.extent (1) != 0 && view.extent (1) == 0,
634 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
635 " = " << whichVectors.size () <<
" > 0.");
636 size_t maxColInd = 0;
637 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
638 for (size_type k = 0; k < whichVectors.size (); ++k) {
639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
640 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
641 std::invalid_argument,
"whichVectors[" << k <<
"] = "
642 "Teuchos::OrdinalTraits<size_t>::invalid().");
643 maxColInd = std::max (maxColInd, whichVectors[k]);
645 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
646 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
647 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
648 <<
" <= max(whichVectors) = " << maxColInd <<
".");
653 const size_t LDA = getDualViewStride (view);
654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
655 (LDA < lclNumRows, std::invalid_argument,
656 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
658 if (whichVectors.size () == 1) {
667 const std::pair<size_t, size_t> colRng (whichVectors[0],
668 whichVectors[0] + 1);
676 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
679 const dual_view_type& view,
680 const dual_view_type& origView,
681 const Teuchos::ArrayView<const size_t>& whichVectors) :
684 origView_ (origView),
685 whichVectors_ (whichVectors.begin (), whichVectors.end ())
688 using Kokkos::subview;
689 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
691 using ::Tpetra::Details::Behavior;
692 const bool debug = Behavior::debug ();
694 using ::Tpetra::Details::checkGlobalDualViewValidity;
695 std::ostringstream gblErrStrm;
696 const bool verbose = Behavior::verbose ();
697 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
698 const bool gblValid_0 =
699 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
701 const bool gblValid_1 =
702 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
704 const bool gblValid = gblValid_0 && gblValid_1;
705 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
706 (! gblValid, std::runtime_error, gblErrStrm.str ());
716 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
717 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
718 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
719 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
720 if (whichVectors.size () != 0) {
721 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
722 view.extent (1) != 0 && view.extent (1) == 0,
723 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
724 " = " << whichVectors.size () <<
" > 0.");
725 size_t maxColInd = 0;
726 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
727 for (size_type k = 0; k < whichVectors.size (); ++k) {
728 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
729 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
730 std::invalid_argument,
"whichVectors[" << k <<
"] = "
731 "Teuchos::OrdinalTraits<size_t>::invalid().");
732 maxColInd = std::max (maxColInd, whichVectors[k]);
734 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
735 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
736 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
737 <<
" <= max(whichVectors) = " << maxColInd <<
".");
742 const size_t LDA = getDualViewStride (origView);
743 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
744 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
745 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
746 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
747 <<
", origView.stride(1) = " << origView.d_view.stride(1) <<
".");
749 if (whichVectors.size () == 1) {
758 const std::pair<size_t, size_t> colRng (whichVectors[0],
759 whichVectors[0] + 1);
767 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
770 const Teuchos::ArrayView<const Scalar>& data,
772 const size_t numVecs) :
775 typedef LocalOrdinal LO;
776 typedef GlobalOrdinal GO;
777 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
783 const size_t lclNumRows =
784 map.is_null () ? size_t (0) : map->getNodeNumElements ();
785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
786 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
787 "map->getNodeNumElements() = " << lclNumRows <<
".");
789 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
790 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
791 (static_cast<size_t> (data.size ()) < minNumEntries,
792 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
793 "entries, given the input Map and number of vectors in the MultiVector."
794 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
795 "map->getNodeNumElements () = " << minNumEntries <<
".");
798 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
811 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
812 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
813 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
818 const size_t outStride =
819 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
820 if (LDA == outStride) {
826 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
828 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
830 for (
size_t j = 0; j < numVecs; ++j) {
831 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
832 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
838 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
841 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
842 const size_t numVecs) :
846 typedef LocalOrdinal LO;
847 typedef GlobalOrdinal GO;
848 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
851 const size_t lclNumRows =
852 map.is_null () ? size_t (0) : map->getNodeNumElements ();
853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
854 (numVecs < 1 || numVecs != static_cast<size_t> (ArrayOfPtrs.size ()),
855 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
856 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
857 for (
size_t j = 0; j < numVecs; ++j) {
858 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
859 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
860 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
861 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
862 << X_j_av.size () <<
" < map->getNodeNumElements() = " << lclNumRows
866 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
872 using array_layout =
typename decltype (X_out)::array_layout;
873 using input_col_view_type =
typename Kokkos::View<
const IST*,
876 Kokkos::MemoryUnmanaged>;
878 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
879 for (
size_t j = 0; j < numVecs; ++j) {
880 Teuchos::ArrayView<const IST> X_j_av =
881 Teuchos::av_reinterpret_cast<
const IST> (ArrayOfPtrs[j]);
882 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
883 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
889 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
892 return whichVectors_.empty ();
895 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
900 if (this->getMap ().is_null ()) {
901 return static_cast<size_t> (0);
903 return this->getMap ()->getNodeNumElements ();
907 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
912 if (this->getMap ().is_null ()) {
913 return static_cast<size_t> (0);
915 return this->getMap ()->getGlobalNumElements ();
919 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
924 return isConstantStride () ? getDualViewStride (origView_) : size_t (0);
927 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
936 const this_type* src =
dynamic_cast<const this_type*
> (&sourceObj);
937 if (src ==
nullptr) {
947 return src->getNumVectors () == this->getNumVectors ();
951 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
955 return this->getNumVectors ();
958 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
963 const size_t numSameIDs,
964 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
965 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs)
967 using ::Tpetra::Details::Behavior;
969 using ::Tpetra::Details::ProfilingRegion;
971 using KokkosRefactor::Details::permute_array_multi_column;
972 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
973 using Kokkos::Compat::create_const_view;
975 const char tfecfFuncName[] =
"copyAndPermute: ";
976 ProfilingRegion regionCAP (
"Tpetra::MultiVector::copyAndPermute");
978 const bool verbose = Behavior::verbose ();
979 std::unique_ptr<std::string> prefix;
981 auto map = this->getMap ();
982 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
983 const int myRank = comm.is_null () ? -1 : comm->getRank ();
984 std::ostringstream os;
985 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
986 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
987 os <<
"Start" << endl;
988 std::cerr << os.str ();
991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
992 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
993 std::logic_error,
"permuteToLIDs.extent(0) = "
994 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
995 << permuteFromLIDs.extent (0) <<
".");
998 MV& sourceMV =
const_cast<MV &
>(
dynamic_cast<const MV&
> (sourceObj));
999 const size_t numCols = this->getNumVectors ();
1003 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1004 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1005 std::logic_error,
"Input MultiVector needs sync to both host "
1007 const bool copyOnHost = runKernelOnHost(sourceMV);
1009 std::ostringstream os;
1010 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1011 std::cerr << os.str ();
1015 sourceMV.sync_host();
1017 this->modify_host ();
1020 sourceMV.sync_device();
1021 if (this->need_sync_device ()) {
1022 this->sync_device ();
1024 this->modify_device ();
1028 std::ostringstream os;
1029 os << *prefix <<
"Copy" << endl;
1030 std::cerr << os.str ();
1055 if (numSameIDs > 0) {
1056 const std::pair<size_t, size_t> rows (0, numSameIDs);
1058 auto tgt_h = this->getLocalViewHost ();
1059 auto src_h = create_const_view (sourceMV.getLocalViewHost ());
1061 for (
size_t j = 0; j < numCols; ++j) {
1062 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1063 const size_t srcCol =
1064 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1066 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1067 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1072 auto tgt_d = this->getLocalViewDevice ();
1073 auto src_d = create_const_view (sourceMV.getLocalViewDevice ());
1075 for (
size_t j = 0; j < numCols; ++j) {
1076 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1077 const size_t srcCol =
1078 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1080 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1081 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1097 if (permuteFromLIDs.extent (0) == 0 ||
1098 permuteToLIDs.extent (0) == 0) {
1100 std::ostringstream os;
1101 os << *prefix <<
"No permutations. Done!" << endl;
1102 std::cerr << os.str ();
1108 std::ostringstream os;
1109 os << *prefix <<
"Permute" << endl;
1110 std::cerr << os.str ();
1115 const bool nonConstStride =
1116 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1119 std::ostringstream os;
1120 os << *prefix <<
"nonConstStride="
1121 << (nonConstStride ?
"true" :
"false") << endl;
1122 std::cerr << os.str ();
1129 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1130 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1131 if (nonConstStride) {
1132 if (this->whichVectors_.size () == 0) {
1133 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1134 tmpTgt.modify_host ();
1135 for (
size_t j = 0; j < numCols; ++j) {
1136 tmpTgt.h_view(j) = j;
1139 tmpTgt.sync_device ();
1141 tgtWhichVecs = tmpTgt;
1144 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1146 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1150 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1151 (static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1152 this->getNumVectors (),
1153 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1154 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1155 this->getNumVectors () <<
".");
1157 if (sourceMV.whichVectors_.size () == 0) {
1158 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1159 tmpSrc.modify_host ();
1160 for (
size_t j = 0; j < numCols; ++j) {
1161 tmpSrc.h_view(j) = j;
1164 tmpSrc.sync_device ();
1166 srcWhichVecs = tmpSrc;
1169 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1170 sourceMV.whichVectors_ ();
1172 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1176 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1177 (static_cast<size_t> (srcWhichVecs.extent (0)) !=
1178 sourceMV.getNumVectors (), std::logic_error,
1179 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1180 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1186 std::ostringstream os;
1187 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1188 std::cerr << os.str ();
1190 auto tgt_h = this->getLocalViewHost ();
1191 auto src_h = create_const_view (sourceMV.getLocalViewHost ());
1193 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1194 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1195 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1196 auto permuteFromLIDs_h =
1197 create_const_view (permuteFromLIDs.view_host ());
1200 std::ostringstream os;
1201 os << *prefix <<
"Permute on host" << endl;
1202 std::cerr << os.str ();
1204 if (nonConstStride) {
1207 auto tgtWhichVecs_h =
1208 create_const_view (tgtWhichVecs.view_host ());
1209 auto srcWhichVecs_h =
1210 create_const_view (srcWhichVecs.view_host ());
1211 permute_array_multi_column_variable_stride (tgt_h, src_h,
1215 srcWhichVecs_h, numCols);
1218 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1219 permuteFromLIDs_h, numCols);
1224 std::ostringstream os;
1225 os << *prefix <<
"Get permute LIDs on device" << endl;
1226 std::cerr << os.str ();
1228 auto tgt_d = this->getLocalViewDevice ();
1229 auto src_d = create_const_view (sourceMV.getLocalViewDevice ());
1231 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1232 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1233 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1234 auto permuteFromLIDs_d =
1235 create_const_view (permuteFromLIDs.view_device ());
1238 std::ostringstream os;
1239 os << *prefix <<
"Permute on device" << endl;
1240 std::cerr << os.str ();
1242 if (nonConstStride) {
1245 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1246 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1247 permute_array_multi_column_variable_stride (tgt_d, src_d,
1251 srcWhichVecs_d, numCols);
1254 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1255 permuteFromLIDs_d, numCols);
1260 std::ostringstream os;
1261 os << *prefix <<
"Done!" << endl;
1262 std::cerr << os.str ();
1266 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1268 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1270 (
const SrcDistObject& sourceObj,
1271 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1272 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1273 Kokkos::DualView<size_t*, buffer_device_type> ,
1274 size_t& constantNumPackets,
1277 using ::Tpetra::Details::Behavior;
1278 using ::Tpetra::Details::ProfilingRegion;
1280 using Kokkos::Compat::create_const_view;
1281 using Kokkos::Compat::getKokkosViewDeepCopy;
1283 using MV = MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
1284 const char tfecfFuncName[] =
"packAndPrepare: ";
1285 ProfilingRegion regionPAP (
"Tpetra::MultiVector::packAndPrepare");
1293 const bool debugCheckIndices = Behavior::debug ();
1298 const bool printDebugOutput = Behavior::verbose ();
1300 std::unique_ptr<std::string> prefix;
1301 if (printDebugOutput) {
1302 auto map = this->getMap ();
1303 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1304 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1305 std::ostringstream os;
1306 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1307 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1308 os <<
"Start" << endl;
1309 std::cerr << os.str ();
1313 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
> (sourceObj));
1315 const size_t numCols = sourceMV.getNumVectors ();
1320 constantNumPackets = numCols;
1324 if (exportLIDs.extent (0) == 0) {
1325 if (printDebugOutput) {
1326 std::ostringstream os;
1327 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1328 std::cerr << os.str ();
1348 const size_t numExportLIDs = exportLIDs.extent (0);
1349 const size_t newExportsSize = numCols * numExportLIDs;
1350 if (printDebugOutput) {
1351 std::ostringstream os;
1352 os << *prefix <<
"realloc: "
1353 <<
"numExportLIDs: " << numExportLIDs
1354 <<
", exports.extent(0): " << exports.extent (0)
1355 <<
", newExportsSize: " << newExportsSize << std::endl;
1356 std::cerr << os.str ();
1362 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1363 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1364 std::logic_error,
"Input MultiVector needs sync to both host "
1366 const bool packOnHost = runKernelOnHost(sourceMV);
1367 auto src_dev = sourceMV.getLocalViewHost ();
1368 auto src_host = sourceMV.getLocalViewDevice ();
1369 if (printDebugOutput) {
1370 std::ostringstream os;
1371 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1372 std::cerr << os.str ();
1384 exports.clear_sync_state ();
1385 exports.modify_host ();
1386 sourceMV.sync_host();
1394 exports.clear_sync_state ();
1395 exports.modify_device ();
1396 sourceMV.sync_device();
1412 if (sourceMV.isConstantStride ()) {
1413 using KokkosRefactor::Details::pack_array_single_column;
1414 if (printDebugOutput) {
1415 std::ostringstream os;
1416 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1417 std::cerr << os.str ();
1420 pack_array_single_column (exports.view_host (),
1421 create_const_view (src_host),
1422 exportLIDs.view_host (),
1427 pack_array_single_column (exports.view_device (),
1428 create_const_view (src_dev),
1429 exportLIDs.view_device (),
1435 using KokkosRefactor::Details::pack_array_single_column;
1436 if (printDebugOutput) {
1437 std::ostringstream os;
1438 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1439 std::cerr << os.str ();
1442 pack_array_single_column (exports.view_host (),
1443 create_const_view (src_host),
1444 exportLIDs.view_host (),
1445 sourceMV.whichVectors_[0],
1449 pack_array_single_column (exports.view_device (),
1450 create_const_view (src_dev),
1451 exportLIDs.view_device (),
1452 sourceMV.whichVectors_[0],
1458 if (sourceMV.isConstantStride ()) {
1459 using KokkosRefactor::Details::pack_array_multi_column;
1460 if (printDebugOutput) {
1461 std::ostringstream os;
1462 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1463 std::cerr << os.str ();
1466 pack_array_multi_column (exports.view_host (),
1467 create_const_view (src_host),
1468 exportLIDs.view_host (),
1473 pack_array_multi_column (exports.view_device (),
1474 create_const_view (src_dev),
1475 exportLIDs.view_device (),
1481 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1482 if (printDebugOutput) {
1483 std::ostringstream os;
1484 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1486 std::cerr << os.str ();
1491 using IST = impl_scalar_type;
1492 using DV = Kokkos::DualView<IST*, device_type>;
1493 using HES =
typename DV::t_host::execution_space;
1494 using DES =
typename DV::t_dev::execution_space;
1495 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1497 pack_array_multi_column_variable_stride
1498 (exports.view_host (),
1499 create_const_view (src_host),
1500 exportLIDs.view_host (),
1501 getKokkosViewDeepCopy<HES> (whichVecs),
1506 pack_array_multi_column_variable_stride
1507 (exports.view_device (),
1508 create_const_view (src_dev),
1509 exportLIDs.view_device (),
1510 getKokkosViewDeepCopy<DES> (whichVecs),
1517 if (printDebugOutput) {
1518 std::ostringstream os;
1519 os << *prefix <<
"Done!" << endl;
1520 std::cerr << os.str ();
1525 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1527 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1529 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1530 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1531 Kokkos::DualView<size_t*, buffer_device_type> ,
1532 const size_t constantNumPackets,
1536 using ::Tpetra::Details::Behavior;
1537 using ::Tpetra::Details::ProfilingRegion;
1538 using KokkosRefactor::Details::unpack_array_multi_column;
1539 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1540 using Kokkos::Compat::getKokkosViewDeepCopy;
1542 using IST = impl_scalar_type;
1543 const char longFuncName[] =
"Tpetra::MultiVector::unpackAndCombine";
1544 const char tfecfFuncName[] =
"unpackAndCombine: ";
1545 ProfilingRegion regionUAC (longFuncName);
1553 const bool debugCheckIndices = Behavior::debug ();
1555 const bool printDebugOutput = Behavior::verbose ();
1556 std::unique_ptr<std::string> prefix;
1557 if (printDebugOutput) {
1558 auto map = this->getMap ();
1559 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1560 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1561 std::ostringstream os;
1562 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1563 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1564 os <<
"Start" << endl;
1565 std::cerr << os.str ();
1569 if (importLIDs.extent (0) == 0) {
1570 if (printDebugOutput) {
1571 std::ostringstream os;
1572 os << *prefix <<
"No imports. Done!" << endl;
1577 const size_t numVecs = getNumVectors ();
1578 if (debugCheckIndices) {
1579 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1580 (static_cast<size_t> (imports.extent (0)) !=
1581 numVecs * importLIDs.extent (0),
1583 "imports.extent(0) = " << imports.extent (0)
1584 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1585 <<
" * " << importLIDs.extent (0) <<
" = "
1586 << numVecs * importLIDs.extent (0) <<
".");
1588 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1589 (constantNumPackets == static_cast<size_t> (0), std::runtime_error,
1590 "constantNumPackets input argument must be nonzero.");
1592 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1593 (static_cast<size_t> (numVecs) !=
1594 static_cast<size_t> (constantNumPackets),
1595 std::runtime_error,
"constantNumPackets must equal numVecs.");
1601 const bool unpackOnHost = runKernelOnHost(imports);
1603 if (printDebugOutput) {
1604 std::ostringstream os;
1605 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1607 std::cerr << os.str ();
1613 imports.sync_host();
1615 this->modify_host ();
1618 imports.sync_device();
1619 this->sync_device ();
1620 this->modify_device ();
1622 auto X_d = this->getLocalViewDevice ();
1623 auto X_h = this->getLocalViewHost ();
1624 auto imports_d = imports.view_device ();
1625 auto imports_h = imports.view_host ();
1626 auto importLIDs_d = importLIDs.view_device ();
1627 auto importLIDs_h = importLIDs.view_host ();
1629 Kokkos::DualView<size_t*, device_type> whichVecs;
1630 if (! isConstantStride ()) {
1631 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1632 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1634 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1636 whichVecs.modify_host ();
1640 whichVecs.modify_device ();
1644 auto whichVecs_d = whichVecs.view_device ();
1645 auto whichVecs_h = whichVecs.view_host ();
1655 if (numVecs > 0 && importLIDs.extent (0) > 0) {
1656 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
1657 using host_exec_space =
typename dual_view_type::t_host::execution_space;
1660 const bool use_atomic_updates = unpackOnHost ?
1661 host_exec_space::concurrency () != 1 :
1662 dev_exec_space::concurrency () != 1;
1664 if (printDebugOutput) {
1665 std::ostringstream os;
1667 std::cerr << os.str ();
1674 using op_type = KokkosRefactor::Details::InsertOp<IST>;
1675 if (isConstantStride ()) {
1677 unpack_array_multi_column (host_exec_space (),
1678 X_h, imports_h, importLIDs_h,
1679 op_type (), numVecs,
1685 unpack_array_multi_column (dev_exec_space (),
1686 X_d, imports_d, importLIDs_d,
1687 op_type (), numVecs,
1694 unpack_array_multi_column_variable_stride (host_exec_space (),
1704 unpack_array_multi_column_variable_stride (dev_exec_space (),
1715 else if (CM ==
ADD) {
1716 using op_type = KokkosRefactor::Details::AddOp<IST>;
1717 if (isConstantStride ()) {
1719 unpack_array_multi_column (host_exec_space (),
1720 X_h, imports_h, importLIDs_h,
1721 op_type (), numVecs,
1726 unpack_array_multi_column (dev_exec_space (),
1727 X_d, imports_d, importLIDs_d,
1728 op_type (), numVecs,
1735 unpack_array_multi_column_variable_stride (host_exec_space (),
1745 unpack_array_multi_column_variable_stride (dev_exec_space (),
1757 using op_type = KokkosRefactor::Details::AbsMaxOp<IST>;
1758 if (isConstantStride ()) {
1760 unpack_array_multi_column (host_exec_space (),
1761 X_h, imports_h, importLIDs_h,
1762 op_type (), numVecs,
1767 unpack_array_multi_column (dev_exec_space (),
1768 X_d, imports_d, importLIDs_d,
1769 op_type (), numVecs,
1776 unpack_array_multi_column_variable_stride (host_exec_space (),
1786 unpack_array_multi_column_variable_stride (dev_exec_space (),
1798 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1799 (
true, std::logic_error,
"Invalid CombineMode");
1803 if (printDebugOutput) {
1804 std::ostringstream os;
1805 os << *prefix <<
"Nothing to unpack" << endl;
1806 std::cerr << os.str ();
1810 if (printDebugOutput) {
1811 std::ostringstream os;
1812 os << *prefix <<
"Done!" << endl;
1813 std::cerr << os.str ();
1817 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1822 if (isConstantStride ()) {
1823 return static_cast<size_t> (view_.extent (1));
1825 return static_cast<size_t> (whichVectors_.size ());
1833 gblDotImpl (
const RV& dotsOut,
1834 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
1835 const bool distributed)
1837 using Teuchos::REDUCE_MAX;
1838 using Teuchos::REDUCE_SUM;
1839 using Teuchos::reduceAll;
1840 typedef typename RV::non_const_value_type dot_type;
1842 const size_t numVecs = dotsOut.extent (0);
1857 if (distributed && ! comm.is_null ()) {
1860 const int nv =
static_cast<int> (numVecs);
1861 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
1863 if (commIsInterComm) {
1867 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
1869 const dot_type*
const lclSum = lclDots.data ();
1870 dot_type*
const gblSum = dotsOut.data ();
1871 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
1874 dot_type*
const inout = dotsOut.data ();
1875 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
1881 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1885 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
1887 using ::Tpetra::Details::Behavior;
1888 using Kokkos::subview;
1889 using Teuchos::Comm;
1890 using Teuchos::null;
1893 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
1895 typedef typename dual_view_type::t_dev XMV;
1896 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
1900 const size_t numVecs = this->getNumVectors ();
1904 const size_t lclNumRows = this->getLocalLength ();
1905 const size_t numDots =
static_cast<size_t> (dots.extent (0));
1906 const bool debug = Behavior::debug ();
1909 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
1910 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1911 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
1912 "compatible with the input MultiVector A. We only test for this "
1923 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1925 "MultiVectors do not have the same local length. "
1926 "this->getLocalLength() = " << lclNumRows <<
" != "
1928 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1930 "MultiVectors must have the same number of columns (vectors). "
1931 "this->getNumVectors() = " << numVecs <<
" != "
1933 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1934 numDots != numVecs, std::runtime_error,
1935 "The output array 'dots' must have the same number of entries as the "
1936 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
1937 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
1939 const std::pair<size_t, size_t> colRng (0, numVecs);
1940 RV dotsOut = subview (dots, colRng);
1941 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
1942 this->getMap ()->getComm ();
1945 if (this->need_sync_device ()) {
1946 const_cast<MV&
>(*this).sync_device ();
1949 const_cast<MV&
>(A).sync_device ();
1952 auto thisView = this->getLocalViewDevice ();
1955 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
1956 this->whichVectors_.getRawPtr (),
1959 gblDotImpl (dotsOut, comm, this->isDistributed ());
1963 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1968 using ::Tpetra::Details::ProfilingRegion;
1970 using dot_type =
typename MV::dot_type;
1971 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
1974 Teuchos::RCP<const Teuchos::Comm<int> > comm =
1975 map.is_null () ? Teuchos::null : map->getComm ();
1976 if (comm.is_null ()) {
1977 return Kokkos::ArithTraits<dot_type>::zero ();
1980 using LO = LocalOrdinal;
1984 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
1986 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
1987 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
1988 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
1995 const_cast<MV&
>(y).sync_device ();
2000 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
2002 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
2003 lclDot = KokkosBlas::dot (x_1d, y_1d);
2006 using Teuchos::outArg;
2007 using Teuchos::REDUCE_SUM;
2008 using Teuchos::reduceAll;
2009 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2021 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2025 const Teuchos::ArrayView<dot_type>& dots)
const
2028 const char tfecfFuncName[] =
"dot: ";
2031 const size_t numVecs = this->getNumVectors ();
2032 const size_t lclNumRows = this->getLocalLength ();
2033 const size_t numDots =
static_cast<size_t> (dots.size ());
2043 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2045 "MultiVectors do not have the same local length. "
2046 "this->getLocalLength() = " << lclNumRows <<
" != "
2048 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2050 "MultiVectors must have the same number of columns (vectors). "
2051 "this->getNumVectors() = " << numVecs <<
" != "
2053 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2054 (numDots != numVecs, std::runtime_error,
2055 "The output array 'dots' must have the same number of entries as the "
2056 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2057 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2060 const dot_type gblDot = multiVectorSingleColumnDot (const_cast<MV&> (*
this), A);
2064 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2068 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2071 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2074 using ::Tpetra::Details::NORM_TWO;
2075 using ::Tpetra::Details::ProfilingRegion;
2076 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2079 MV& X =
const_cast<MV&
> (*this);
2080 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2083 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2086 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2088 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2089 this->norm2 (norms_av);
2093 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2096 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2099 using ::Tpetra::Details::NORM_ONE;
2100 using ::Tpetra::Details::ProfilingRegion;
2101 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2104 MV& X =
const_cast<MV&
> (*this);
2105 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2108 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2111 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2113 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2114 this->norm1 (norms_av);
2117 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2120 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2123 using ::Tpetra::Details::NORM_INF;
2124 using ::Tpetra::Details::ProfilingRegion;
2125 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2128 MV& X =
const_cast<MV&
> (*this);
2129 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2132 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2135 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2137 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2138 this->normInf (norms_av);
2141 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2144 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2148 using Kokkos::subview;
2149 using Teuchos::Comm;
2151 using Teuchos::reduceAll;
2152 using Teuchos::REDUCE_SUM;
2153 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
2155 const size_t lclNumRows = this->getLocalLength ();
2156 const size_t numVecs = this->getNumVectors ();
2157 const size_t numMeans =
static_cast<size_t> (means.size ());
2159 TEUCHOS_TEST_FOR_EXCEPTION(
2160 numMeans != numVecs, std::runtime_error,
2161 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2162 <<
" != this->getNumVectors() = " << numVecs <<
".");
2164 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2165 const std::pair<size_t, size_t> colRng (0, numVecs);
2170 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2172 typename local_view_type::HostMirror::array_layout,
2174 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2175 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2177 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2178 this->getMap ()->getComm ();
2181 const bool useHostVersion = this->need_sync_device ();
2182 if (useHostVersion) {
2184 auto X_lcl = subview (getLocalViewHost (),
2185 rowRng, Kokkos::ALL ());
2187 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2188 if (isConstantStride ()) {
2189 KokkosBlas::sum (lclSums, X_lcl);
2192 for (
size_t j = 0; j < numVecs; ++j) {
2193 const size_t col = whichVectors_[j];
2194 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2201 if (! comm.is_null () && this->isDistributed ()) {
2202 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2203 lclSums.data (), meansOut.data ());
2211 auto X_lcl = subview (this->getLocalViewDevice (),
2212 rowRng, Kokkos::ALL ());
2215 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2216 if (isConstantStride ()) {
2217 KokkosBlas::sum (lclSums, X_lcl);
2220 for (
size_t j = 0; j < numVecs; ++j) {
2221 const size_t col = whichVectors_[j];
2222 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2230 if (! comm.is_null () && this->isDistributed ()) {
2231 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2232 lclSums.data (), meansOut.data ());
2242 const impl_scalar_type OneOverN =
2243 ATS::one () /
static_cast<mag_type> (this->getGlobalLength ());
2244 for (
size_t k = 0; k < numMeans; ++k) {
2245 meansOut(k) = meansOut(k) * OneOverN;
2250 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2256 typedef Kokkos::Details::ArithTraits<IST> ATS;
2257 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2258 typedef typename pool_type::generator_type generator_type;
2260 const IST max = Kokkos::rand<generator_type, IST>::max ();
2261 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2263 this->randomize (static_cast<Scalar> (min), static_cast<Scalar> (max));
2267 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2273 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2284 const uint64_t myRank =
2285 static_cast<uint64_t
> (this->getMap ()->getComm ()->getRank ());
2286 uint64_t seed64 =
static_cast<uint64_t
> (std::rand ()) + myRank + 17311uLL;
2287 unsigned int seed =
static_cast<unsigned int> (seed64&0xffffffff);
2289 pool_type rand_pool (seed);
2290 const IST max =
static_cast<IST
> (maxVal);
2291 const IST min =
static_cast<IST
> (minVal);
2296 this->view_.clear_sync_state();
2298 this->modify_device ();
2299 auto thisView = this->getLocalViewDevice ();
2301 if (isConstantStride ()) {
2302 Kokkos::fill_random (thisView, rand_pool, min, max);
2305 const size_t numVecs = getNumVectors ();
2306 for (
size_t k = 0; k < numVecs; ++k) {
2307 const size_t col = whichVectors_[k];
2308 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2309 Kokkos::fill_random (X_k, rand_pool, min, max);
2314 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2319 using ::Tpetra::Details::ProfilingRegion;
2320 using ::Tpetra::Details::Blas::fill;
2321 using DES =
typename dual_view_type::t_dev::execution_space;
2322 using HES =
typename dual_view_type::t_host::execution_space;
2323 using LO = LocalOrdinal;
2324 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2329 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
2330 const LO numVecs =
static_cast<LO
> (this->getNumVectors ());
2336 const bool runOnHost = runKernelOnHost(*
this);
2338 this->clear_sync_state();
2340 this->modify_device ();
2341 auto X = this->getLocalViewDevice ();
2342 if (this->isConstantStride ()) {
2343 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2346 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2347 this->whichVectors_.getRawPtr ());
2351 this->modify_host ();
2352 auto X = this->getLocalViewHost ();
2353 if (this->isConstantStride ()) {
2354 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2357 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2358 this->whichVectors_.getRawPtr ());
2364 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2369 using Teuchos::ArrayRCP;
2370 using Teuchos::Comm;
2373 using LO = LocalOrdinal;
2374 using GO = GlobalOrdinal;
2380 TEUCHOS_TEST_FOR_EXCEPTION(
2381 ! this->isConstantStride (), std::logic_error,
2382 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2383 "if the MultiVector is a column view of another MultiVector (that is, if "
2384 "isConstantStride() == false).");
2414 #ifdef HAVE_TEUCHOS_DEBUG
2428 #endif // HAVE_TEUCHOS_DEBUG
2430 if (this->getMap ().is_null ()) {
2435 TEUCHOS_TEST_FOR_EXCEPTION(
2436 newMap.is_null (), std::invalid_argument,
2437 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2438 "This probably means that the input Map is incorrect.");
2442 const size_t newNumRows = newMap->getNodeNumElements ();
2443 const size_t origNumRows = view_.extent (0);
2444 const size_t numCols = this->getNumVectors ();
2446 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2447 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2450 else if (newMap.is_null ()) {
2453 const size_t newNumRows =
static_cast<size_t> (0);
2454 const size_t numCols = this->getNumVectors ();
2455 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2458 this->map_ = newMap;
2461 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2469 const IST theAlpha =
static_cast<IST
> (alpha);
2470 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2473 const size_t lclNumRows = getLocalLength ();
2474 const size_t numVecs = getNumVectors ();
2475 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2476 const std::pair<size_t, size_t> colRng (0, numVecs);
2484 const bool useHostVersion = need_sync_device ();
2485 if (useHostVersion) {
2486 auto Y_lcl = Kokkos::subview (getLocalViewHost (), rowRng, ALL ());
2487 if (isConstantStride ()) {
2488 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2491 for (
size_t k = 0; k < numVecs; ++k) {
2492 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2493 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2494 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2499 auto Y_lcl = Kokkos::subview (getLocalViewDevice (), rowRng, ALL ());
2500 if (isConstantStride ()) {
2501 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2504 for (
size_t k = 0; k < numVecs; ++k) {
2505 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2506 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2507 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2514 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2517 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2519 const size_t numVecs = this->getNumVectors ();
2520 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2521 TEUCHOS_TEST_FOR_EXCEPTION(
2522 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2523 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2527 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2528 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2529 k_alphas.modify_host ();
2530 for (
size_t i = 0; i < numAlphas; ++i) {
2533 k_alphas.sync_device ();
2535 this->scale (k_alphas.view_device ());
2538 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2541 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2544 using Kokkos::subview;
2546 const size_t lclNumRows = this->getLocalLength ();
2547 const size_t numVecs = this->getNumVectors ();
2548 TEUCHOS_TEST_FOR_EXCEPTION(
2549 static_cast<size_t> (alphas.extent (0)) != numVecs,
2550 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2551 "alphas.extent(0) = " << alphas.extent (0)
2552 <<
" != this->getNumVectors () = " << numVecs <<
".");
2553 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2554 const std::pair<size_t, size_t> colRng (0, numVecs);
2564 const bool useHostVersion = this->need_sync_device ();
2565 if (useHostVersion) {
2568 auto alphas_h = Kokkos::create_mirror_view (alphas);
2571 auto Y_lcl = subview (this->getLocalViewHost (), rowRng, ALL ());
2572 if (isConstantStride ()) {
2573 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2576 for (
size_t k = 0; k < numVecs; ++k) {
2577 const size_t Y_col = this->isConstantStride () ? k :
2578 this->whichVectors_[k];
2579 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2582 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2587 auto Y_lcl = subview (this->getLocalViewDevice (), rowRng, ALL ());
2588 if (isConstantStride ()) {
2589 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2596 auto alphas_h = Kokkos::create_mirror_view (alphas);
2599 for (
size_t k = 0; k < numVecs; ++k) {
2600 const size_t Y_col = this->isConstantStride () ? k :
2601 this->whichVectors_[k];
2602 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2603 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2609 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2616 using Kokkos::subview;
2618 const char tfecfFuncName[] =
"scale: ";
2620 const size_t lclNumRows = getLocalLength ();
2621 const size_t numVecs = getNumVectors ();
2623 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2625 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2627 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2629 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2633 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2634 const std::pair<size_t, size_t> colRng (0, numVecs);
2637 if (this->need_sync_device ()) {
2638 this->sync_device ();
2641 const_cast<MV&
>(A).sync_device ();
2644 this->modify_device ();
2645 auto Y_lcl_orig = this->getLocalViewDevice ();
2647 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
2648 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
2651 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2655 for (
size_t k = 0; k < numVecs; ++k) {
2656 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2658 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2659 auto X_k = subview (X_lcl, ALL (), X_col);
2661 KokkosBlas::scal (Y_k, theAlpha, X_k);
2668 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2674 const char tfecfFuncName[] =
"reciprocal: ";
2676 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2678 "MultiVectors do not have the same local length. "
2679 "this->getLocalLength() = " << getLocalLength ()
2681 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2682 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2683 ": MultiVectors do not have the same number of columns (vectors). "
2684 "this->getNumVectors() = " << getNumVectors ()
2685 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2687 const size_t numVecs = getNumVectors ();
2690 if (this->need_sync_device ()) {
2691 this->sync_device ();
2694 const_cast<MV&
>(A).sync_device ();
2696 this->modify_device ();
2698 auto this_view_dev = this->getLocalViewDevice ();
2702 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
2706 using Kokkos::subview;
2707 for (
size_t k = 0; k < numVecs; ++k) {
2708 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2709 auto vector_k = subview (this_view_dev, ALL (), this_col);
2710 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2711 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2712 KokkosBlas::reciprocal (vector_k, vector_Ak);
2717 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2723 const char tfecfFuncName[] =
"abs";
2725 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2727 ": MultiVectors do not have the same local length. "
2728 "this->getLocalLength() = " << getLocalLength ()
2730 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2731 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2732 ": MultiVectors do not have the same number of columns (vectors). "
2733 "this->getNumVectors() = " << getNumVectors ()
2734 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2735 const size_t numVecs = getNumVectors ();
2738 if (this->need_sync_device ()) {
2739 this->sync_device ();
2742 const_cast<MV&
>(A).sync_device ();
2744 this->modify_device ();
2746 auto this_view_dev = this->getLocalViewDevice ();
2750 KokkosBlas::abs (this_view_dev, A_view_dev);
2754 using Kokkos::subview;
2756 for (
size_t k=0; k < numVecs; ++k) {
2757 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2758 auto vector_k = subview (this_view_dev, ALL (), this_col);
2759 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2760 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2761 KokkosBlas::abs (vector_k, vector_Ak);
2766 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2773 const char tfecfFuncName[] =
"update: ";
2774 using Kokkos::subview;
2780 const size_t lclNumRows = getLocalLength ();
2781 const size_t numVecs = getNumVectors ();
2783 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2785 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2787 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2789 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2793 if (this->need_sync_device ()) {
2794 this->sync_device ();
2797 const_cast<MV&
>(A).sync_device ();
2802 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2803 const std::pair<size_t, size_t> colRng (0, numVecs);
2805 auto Y_lcl_orig = this->getLocalViewDevice ();
2806 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2808 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2811 this->modify_device ();
2813 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
2817 for (
size_t k = 0; k < numVecs; ++k) {
2818 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2820 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2821 auto X_k = subview (X_lcl, ALL (), X_col);
2823 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
2828 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2835 const Scalar& gamma)
2838 using Kokkos::subview;
2841 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
2845 const size_t lclNumRows = this->getLocalLength ();
2846 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2848 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
2849 "row(s), but this MultiVector has " << lclNumRows <<
" local "
2851 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2853 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
2854 "row(s), but this MultiVector has " << lclNumRows <<
" local "
2856 const size_t numVecs = getNumVectors ();
2857 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2859 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
2860 "but this MultiVector has " << numVecs <<
" column(s).");
2861 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2863 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
2864 "but this MultiVector has " << numVecs <<
" column(s).");
2871 if (this->need_sync_device ()) this->sync_device ();
2876 this->modify_device ();
2878 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2879 const std::pair<size_t, size_t> colRng (0, numVecs);
2884 auto C_lcl = subview (this->getLocalViewDevice (), rowRng, ALL ());
2889 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
2894 for (
size_t k = 0; k < numVecs; ++k) {
2895 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2898 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
2899 theBeta, subview (B_lcl, rowRng, B_col),
2900 theGamma, subview (C_lcl, rowRng, this_col));
2905 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2906 Teuchos::ArrayRCP<const Scalar>
2913 const char tfecfFuncName[] =
"getData: ";
2921 auto hostView = getLocalViewHost ();
2922 const size_t col = isConstantStride () ? j : whichVectors_[j];
2923 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
2924 Teuchos::ArrayRCP<const IST> dataAsArcp =
2925 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
2927 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2928 (static_cast<size_t> (hostView_j.extent (0)) <
2929 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
2930 "hostView_j.extent(0) = " << hostView_j.extent (0)
2931 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
2932 "Please report this bug to the Tpetra developers.");
2934 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
2937 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2938 Teuchos::ArrayRCP<Scalar>
2943 using Kokkos::subview;
2946 const char tfecfFuncName[] =
"getDataNonConst: ";
2952 const_cast<MV*
> (
this)->sync_host ();
2957 auto hostView = getLocalViewHost ();
2958 const size_t col = isConstantStride () ? j : whichVectors_[j];
2959 auto hostView_j = subview (hostView, ALL (), col);
2960 Teuchos::ArrayRCP<IST> dataAsArcp =
2961 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
2963 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2964 (static_cast<size_t> (hostView_j.extent (0)) <
2965 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
2966 "hostView_j.extent(0) = " << hostView_j.extent (0)
2967 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
2968 "Please report this bug to the Tpetra developers.");
2970 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
2973 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2974 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
2976 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
2983 bool contiguous =
true;
2984 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
2985 for (
size_t j = 1; j < numCopyVecs; ++j) {
2986 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
2991 if (contiguous && numCopyVecs > 0) {
2992 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
2995 RCP<const MV> X_sub = this->subView (cols);
2996 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3002 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3003 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3010 RCP<const MV> X_sub = this->subView (colRng);
3011 RCP<MV> Y (
new MV (this->getMap (), static_cast<size_t> (colRng.size ()),
false));
3016 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3020 return origView_.extent (0);
3023 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3027 return origView_.extent (1);
3030 template <
class Scalar,
class LO,
class GO,
class Node>
3033 const Teuchos::RCP<const map_type>& subMap,
3038 using Kokkos::subview;
3039 using Teuchos::outArg;
3042 using Teuchos::reduceAll;
3043 using Teuchos::REDUCE_MIN;
3046 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3047 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3050 std::unique_ptr<std::ostringstream> errStrm;
3057 const auto comm = subMap->getComm ();
3058 TEUCHOS_ASSERT( ! comm.is_null () );
3059 const int myRank = comm->getRank ();
3061 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3063 const LO newNumRows =
static_cast<LO
> (subMap->getNodeNumElements ());
3065 std::ostringstream os;
3066 os <<
"Proc " << myRank <<
": " << prefix
3067 <<
"X: {lclNumRows: " << lclNumRowsBefore
3069 <<
", numCols: " << numCols <<
"}, "
3070 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3071 std::cerr << os.str ();
3076 const bool tooManyElts =
3079 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3080 *errStrm <<
" Proc " << myRank <<
": subMap->getNodeNumElements() (="
3081 << newNumRows <<
") + rowOffset (=" << rowOffset
3085 TEUCHOS_TEST_FOR_EXCEPTION
3086 (! debug && tooManyElts, std::invalid_argument,
3087 prefix << errStrm->str () << suffix);
3091 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3093 std::ostringstream gblErrStrm;
3094 const std::string myErrStr =
3095 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3096 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3097 TEUCHOS_TEST_FOR_EXCEPTION
3098 (
true, std::invalid_argument, gblErrStrm.str ());
3102 using range_type = std::pair<LO, LO>;
3103 const range_type origRowRng
3104 (rowOffset, static_cast<LO> (X.
origView_.extent (0)));
3105 const range_type rowRng
3106 (rowOffset, rowOffset + newNumRows);
3108 dual_view_type newOrigView = subview (X.
origView_, origRowRng, ALL ());
3119 dual_view_type newView =
3120 subview (rowOffset == 0 ? X.
origView_ : X.
view_, rowRng, ALL ());
3127 if (newOrigView.extent (0) == 0 &&
3128 newOrigView.extent (1) != X.
origView_.extent (1)) {
3130 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3132 if (newView.extent (0) == 0 &&
3133 newView.extent (1) != X.
view_.extent (1)) {
3135 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3139 MV (subMap, newView, newOrigView) :
3143 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3144 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3145 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3147 if (errStrm.get () ==
nullptr) {
3148 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3150 *errStrm <<
" Proc " << myRank <<
3151 ": subMap.getNodeNumElements(): " << newNumRows <<
3152 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3153 ", X.getNumVectors(): " << numCols <<
3154 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3156 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3158 std::ostringstream gblErrStrm;
3160 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3161 "dimensions on one or more processes:" << endl;
3163 const std::string myErrStr =
3164 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3165 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3166 gblErrStrm << suffix << endl;
3167 TEUCHOS_TEST_FOR_EXCEPTION
3168 (
true, std::invalid_argument, gblErrStrm.str ());
3173 std::ostringstream os;
3174 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3175 std::cerr << os.str ();
3181 std::ostringstream os;
3182 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3183 std::cerr << os.str ();
3187 template <
class Scalar,
class LO,
class GO,
class Node>
3191 const size_t rowOffset) :
3196 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3197 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3200 const size_t offset)
const
3203 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3206 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3207 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3210 const size_t offset)
3213 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3216 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3217 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3219 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3221 using Teuchos::Array;
3225 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3226 TEUCHOS_TEST_FOR_EXCEPTION(
3227 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3228 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3229 "contain at least one entry, but cols.size() = " << cols.size ()
3234 bool contiguous =
true;
3235 for (
size_t j = 1; j < numViewCols; ++j) {
3236 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3242 if (numViewCols == 0) {
3244 return rcp (
new MV (this->getMap (), numViewCols));
3247 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3251 if (isConstantStride ()) {
3252 return rcp (
new MV (this->getMap (), view_, origView_, cols));
3255 Array<size_t> newcols (cols.size ());
3256 for (
size_t j = 0; j < numViewCols; ++j) {
3257 newcols[j] = whichVectors_[cols[j]];
3259 return rcp (
new MV (this->getMap (), view_, origView_, newcols ()));
3264 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3265 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3269 using ::Tpetra::Details::Behavior;
3271 using Kokkos::subview;
3272 using Teuchos::Array;
3276 const char tfecfFuncName[] =
"subView(Range1D): ";
3278 const size_t lclNumRows = this->getLocalLength ();
3279 const size_t numVecs = this->getNumVectors ();
3283 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3284 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3285 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3287 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3288 numVecs != 0 && colRng.size () != 0 &&
3289 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3290 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3291 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3292 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3293 "[0, " << numVecs <<
"].");
3295 RCP<const MV> X_ret;
3305 const std::pair<size_t, size_t> rows (0, lclNumRows);
3306 if (colRng.size () == 0) {
3307 const std::pair<size_t, size_t> cols (0, 0);
3308 dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3309 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3313 if (isConstantStride ()) {
3314 const std::pair<size_t, size_t> cols (colRng.lbound (),
3315 colRng.ubound () + 1);
3316 dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3317 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3320 if (static_cast<size_t> (colRng.size ()) == static_cast<size_t> (1)) {
3323 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3324 whichVectors_[0] + colRng.ubound () + 1);
3325 dual_view_type X_sub = takeSubview (view_, ALL (), col);
3326 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3329 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3330 whichVectors_.begin () + colRng.ubound () + 1);
3331 X_ret = rcp (
new MV (this->getMap (), view_, origView_, which));
3336 const bool debug = Behavior::debug ();
3338 using Teuchos::Comm;
3339 using Teuchos::outArg;
3340 using Teuchos::REDUCE_MIN;
3341 using Teuchos::reduceAll;
3343 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3344 Teuchos::null : this->getMap ()->getComm ();
3345 if (! comm.is_null ()) {
3349 if (X_ret.is_null ()) {
3352 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3354 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3355 "MultiVector; the return value of this method) is null on some MPI "
3356 "process in this MultiVector's communicator. This should never "
3357 "happen. Please report this bug to the Tpetra developers.");
3358 if (! X_ret.is_null () &&
3359 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3362 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3363 outArg (gblSuccess));
3364 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3365 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3366 "colRng.size(), on at least one MPI process in this MultiVector's "
3367 "communicator. This should never happen. "
3368 "Please report this bug to the Tpetra developers.");
3375 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3376 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3381 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3385 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3386 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3391 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3395 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3401 using Kokkos::subview;
3402 typedef std::pair<size_t, size_t> range_type;
3403 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3406 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3407 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3408 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3410 static_cast<size_t> (j) :
3412 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3424 const size_t newSize = X.
imports_.extent (0) / numCols;
3425 const size_t offset = jj*newSize;
3427 newImports.d_view = subview (X.
imports_.d_view,
3428 range_type (offset, offset+newSize));
3429 newImports.h_view = subview (X.
imports_.h_view,
3430 range_type (offset, offset+newSize));
3434 const size_t newSize = X.
exports_.extent (0) / numCols;
3435 const size_t offset = jj*newSize;
3437 newExports.d_view = subview (X.
exports_.d_view,
3438 range_type (offset, offset+newSize));
3439 newExports.h_view = subview (X.
exports_.h_view,
3440 range_type (offset, offset+newSize));
3451 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3452 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3457 return Teuchos::rcp (
new V (*
this, j));
3461 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3462 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3467 return Teuchos::rcp (
new V (*
this, j));
3471 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3474 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3476 using dev_view_type =
typename dual_view_type::t_dev;
3477 using host_view_type =
typename dual_view_type::t_host;
3479 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3481 Kokkos::MemoryUnmanaged>;
3482 const char tfecfFuncName[] =
"get1dCopy: ";
3484 const size_t numRows = this->getLocalLength ();
3485 const size_t numCols = this->getNumVectors ();
3487 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3488 (LDA < numRows, std::runtime_error,
3489 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3490 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3491 (numRows >
size_t (0) && numCols >
size_t (0) &&
3492 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3494 "A.size() = " << A.size () <<
", but its size must be at least "
3495 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3497 const std::pair<size_t, size_t> rowRange (0, numRows);
3498 const std::pair<size_t, size_t> colRange (0, numCols);
3500 input_view_type A_view_orig (reinterpret_cast<IST*> (A.getRawPtr ()),
3502 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3509 const bool useHostVersion = this->need_sync_device ();
3511 dev_view_type srcView_dev;
3512 host_view_type srcView_host;
3513 if (useHostVersion) {
3514 srcView_host = this->getLocalViewHost ();
3517 srcView_dev = this->getLocalViewDevice ();
3520 if (this->isConstantStride ()) {
3521 if (useHostVersion) {
3529 for (
size_t j = 0; j < numCols; ++j) {
3530 const size_t srcCol = this->whichVectors_[j];
3531 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3533 if (useHostVersion) {
3534 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3538 auto srcColView_dev = Kokkos::subview (srcView_dev, rowRange, srcCol);
3546 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3549 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3552 const char tfecfFuncName[] =
"get2dCopy: ";
3553 const size_t numRows = this->getLocalLength ();
3554 const size_t numCols = this->getNumVectors ();
3556 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3557 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3558 std::runtime_error,
"Input array of pointers must contain as many "
3559 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3560 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3562 if (numRows != 0 && numCols != 0) {
3564 for (
size_t j = 0; j < numCols; ++j) {
3565 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3566 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3567 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3568 "the input array of arrays is not long enough to fit all entries in "
3569 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3570 <<
" < getLocalLength() = " << numRows <<
".");
3574 for (
size_t j = 0; j < numCols; ++j) {
3575 Teuchos::RCP<const V> X_j = this->getVector (j);
3576 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3577 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3583 template <
class SC,
class LO,
class GO,
class NT>
3586 const bool markModified)
3603 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3604 Teuchos::ArrayRCP<const Scalar>
3608 if (getLocalLength () == 0 || getNumVectors () == 0) {
3609 return Teuchos::null;
3611 TEUCHOS_TEST_FOR_EXCEPTION(
3612 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3613 "get1dView: This MultiVector does not have constant stride, so it is "
3614 "not possible to view its data as a single array. You may check "
3615 "whether a MultiVector has constant stride by calling "
3616 "isConstantStride().");
3620 constexpr
bool markModified =
false;
3622 auto X_lcl = syncMVToHostIfNeededAndGetHostView (const_cast<MV&> (*
this),
3624 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3625 Kokkos::Compat::persistingView (X_lcl);
3626 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3630 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3631 Teuchos::ArrayRCP<Scalar>
3635 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3636 return Teuchos::null;
3639 TEUCHOS_TEST_FOR_EXCEPTION
3640 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3641 "get1dViewNonConst: This MultiVector does not have constant stride, "
3642 "so it is not possible to view its data as a single array. You may "
3643 "check whether a MultiVector has constant stride by calling "
3644 "isConstantStride().");
3645 constexpr
bool markModified =
true;
3646 auto X_lcl = syncMVToHostIfNeededAndGetHostView (*
this, markModified);
3647 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3648 Kokkos::Compat::persistingView (X_lcl);
3649 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3653 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3654 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3658 constexpr
bool markModified =
true;
3659 auto X_lcl = syncMVToHostIfNeededAndGetHostView (*
this, markModified);
3665 const size_t myNumRows = this->getLocalLength ();
3666 const size_t numCols = this->getNumVectors ();
3667 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3669 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3670 for (
size_t j = 0; j < numCols; ++j) {
3671 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3672 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3673 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3674 Kokkos::Compat::persistingView (X_lcl_j);
3675 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3680 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3681 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3688 constexpr
bool markModified =
false;
3690 auto X_lcl = syncMVToHostIfNeededAndGetHostView (const_cast<MV&> (*
this),
3696 const size_t myNumRows = this->getLocalLength ();
3697 const size_t numCols = this->getNumVectors ();
3698 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3700 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
3701 for (
size_t j = 0; j < numCols; ++j) {
3702 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3703 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3704 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
3705 Kokkos::Compat::persistingView (X_lcl_j);
3706 views[j] = Teuchos::arcp_reinterpret_cast<
const Scalar> (X_lcl_j_arcp);
3711 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3715 Teuchos::ETransp transB,
3716 const Scalar& alpha,
3721 using ::Tpetra::Details::ProfilingRegion;
3722 using Teuchos::CONJ_TRANS;
3723 using Teuchos::NO_TRANS;
3724 using Teuchos::TRANS;
3727 using Teuchos::rcpFromRef;
3729 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
3731 using STS = Teuchos::ScalarTraits<Scalar>;
3733 const char tfecfFuncName[] =
"multiply: ";
3734 ProfilingRegion region (
"Tpetra::MV::multiply");
3766 const bool C_is_local = ! this->isDistributed ();
3776 auto myMap = this->getMap ();
3777 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
3778 using Teuchos::REDUCE_MIN;
3779 using Teuchos::reduceAll;
3780 using Teuchos::outArg;
3782 auto comm = myMap->getComm ();
3783 const size_t A_nrows =
3785 const size_t A_ncols =
3787 const size_t B_nrows =
3789 const size_t B_ncols =
3792 const bool lclBad = this->getLocalLength () != A_nrows ||
3793 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
3795 const int myRank = comm->getRank ();
3796 std::ostringstream errStrm;
3797 if (this->getLocalLength () != A_nrows) {
3798 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
3799 << this->getLocalLength () <<
" != A_nrows=" << A_nrows
3800 <<
"." << std::endl;
3802 if (this->getNumVectors () != B_ncols) {
3803 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
3804 << this->getNumVectors () <<
" != B_ncols=" << B_ncols
3805 <<
"." << std::endl;
3807 if (A_ncols != B_nrows) {
3808 errStrm <<
"Proc " << myRank <<
": A_ncols="
3809 << A_ncols <<
" != B_nrows=" << B_nrows
3810 <<
"." << std::endl;
3813 const int lclGood = lclBad ? 0 : 1;
3815 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3817 std::ostringstream os;
3818 ::Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
3820 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3821 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
3822 "least one process in this object's communicator." << std::endl
3824 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
3826 << (transA == Teuchos::TRANS ?
"^T" :
3827 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3828 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
3830 << (transA == Teuchos::TRANS ?
"^T" :
3831 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3832 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
3833 <<
"Global dimensions: C(" << this->getGlobalLength () <<
", "
3843 const bool Case1 = C_is_local && A_is_local && B_is_local;
3845 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
3846 transA != NO_TRANS &&
3849 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
3853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3854 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
3855 "Multiplication of op(A) and op(B) into *this is not a "
3856 "supported use case.");
3858 if (beta != STS::zero () && Case2) {
3864 const int myRank = this->getMap ()->getComm ()->getRank ();
3866 beta_local = ATS::zero ();
3875 if (! isConstantStride ()) {
3876 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
3878 C_tmp = rcp (
this,
false);
3881 RCP<const MV> A_tmp;
3883 A_tmp = rcp (
new MV (A, Teuchos::Copy));
3885 A_tmp = rcpFromRef (A);
3888 RCP<const MV> B_tmp;
3890 B_tmp = rcp (
new MV (B, Teuchos::Copy));
3892 B_tmp = rcpFromRef (B);
3895 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3896 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
3897 ! A_tmp->isConstantStride (), std::logic_error,
3898 "Failed to make temporary constant-stride copies of MultiVectors.");
3901 if (A_tmp->need_sync_device ()) {
3902 const_cast<MV&
> (*A_tmp).sync_device ();
3904 const LO A_lclNumRows = A_tmp->getLocalLength ();
3905 const LO A_numVecs = A_tmp->getNumVectors ();
3906 auto A_lcl = A_tmp->getLocalViewDevice ();
3907 auto A_sub = Kokkos::subview (A_lcl,
3908 std::make_pair (LO (0), A_lclNumRows),
3909 std::make_pair (LO (0), A_numVecs));
3911 if (B_tmp->need_sync_device ()) {
3912 const_cast<MV&
> (*B_tmp).sync_device ();
3914 const LO B_lclNumRows = B_tmp->getLocalLength ();
3915 const LO B_numVecs = B_tmp->getNumVectors ();
3916 auto B_lcl = B_tmp->getLocalViewDevice ();
3917 auto B_sub = Kokkos::subview (B_lcl,
3918 std::make_pair (LO (0), B_lclNumRows),
3919 std::make_pair (LO (0), B_numVecs));
3921 if (C_tmp->need_sync_device ()) {
3922 const_cast<MV&
> (*C_tmp).sync_device ();
3924 const LO C_lclNumRows = C_tmp->getLocalLength ();
3925 const LO C_numVecs = C_tmp->getNumVectors ();
3926 auto C_lcl = C_tmp->getLocalViewDevice ();
3927 auto C_sub = Kokkos::subview (C_lcl,
3928 std::make_pair (LO (0), C_lclNumRows),
3929 std::make_pair (LO (0), C_numVecs));
3930 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
3931 (transA == Teuchos::TRANS ?
'T' :
'C'));
3932 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
3933 (transB == Teuchos::TRANS ?
'T' :
'C'));
3936 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
3938 this->modify_device ();
3940 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
3944 if (! isConstantStride ()) {
3949 A_tmp = Teuchos::null;
3950 B_tmp = Teuchos::null;
3959 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3968 using Kokkos::subview;
3971 const char tfecfFuncName[] =
"elementWiseMultiply: ";
3973 const size_t lclNumRows = this->getLocalLength ();
3974 const size_t numVecs = this->getNumVectors ();
3976 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3978 std::runtime_error,
"MultiVectors do not have the same local length.");
3979 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3980 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
3981 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
3985 if (this->need_sync_device ()) {
3986 this->sync_device ();
3989 const_cast<V&
>(A).sync_device ();
3992 const_cast<MV&
>(B).sync_device ();
3994 this->modify_device ();
3996 auto this_view = this->getLocalViewDevice ();
4006 KokkosBlas::mult (scalarThis,
4009 subview (A_view, ALL (), 0),
4013 for (
size_t j = 0; j < numVecs; ++j) {
4014 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4016 KokkosBlas::mult (scalarThis,
4017 subview (this_view, ALL (), C_col),
4019 subview (A_view, ALL (), 0),
4020 subview (B_view, ALL (), B_col));
4025 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4031 using ::Tpetra::Details::ProfilingRegion;
4032 ProfilingRegion region (
"Tpetra::MV::reduce");
4034 const auto map = this->getMap ();
4035 if (map.get () ==
nullptr) {
4038 const auto comm = map->getComm ();
4039 if (comm.get () ==
nullptr) {
4045 const bool changed_on_device = this->need_sync_host ();
4046 if (changed_on_device) {
4050 this->modify_device ();
4051 auto X_lcl = this->getLocalViewDevice ();
4055 this->modify_host ();
4056 auto X_lcl = this->getLocalViewHost ();
4061 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4068 #ifdef HAVE_TPETRA_DEBUG
4069 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4070 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4071 TEUCHOS_TEST_FOR_EXCEPTION(
4072 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4074 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4075 <<
" is invalid. The range of valid row indices on this process "
4076 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4077 <<
", " << maxLocalIndex <<
"].");
4078 TEUCHOS_TEST_FOR_EXCEPTION(
4079 vectorIndexOutOfRange(col),
4081 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4082 <<
" of the multivector is invalid.");
4084 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4085 view_.h_view (lclRow, colInd) = ScalarValue;
4089 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4095 const bool atomic)
const
4097 #ifdef HAVE_TPETRA_DEBUG
4098 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4099 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4100 TEUCHOS_TEST_FOR_EXCEPTION(
4101 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4103 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4104 <<
" is invalid. The range of valid row indices on this process "
4105 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4106 <<
", " << maxLocalIndex <<
"].");
4107 TEUCHOS_TEST_FOR_EXCEPTION(
4108 vectorIndexOutOfRange(col),
4110 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4111 <<
" of the multivector is invalid.");
4113 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4115 Kokkos::atomic_add (& (view_.h_view(lclRow, colInd)), value);
4118 view_.h_view (lclRow, colInd) += value;
4123 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4132 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4133 #ifdef HAVE_TPETRA_DEBUG
4134 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4135 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4136 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4138 "Global row index " << gblRow <<
" is not present on this process "
4139 << this->getMap ()->getComm ()->getRank () <<
".");
4140 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4141 (this->vectorIndexOutOfRange (col), std::runtime_error,
4142 "Vector index " << col <<
" of the MultiVector is invalid.");
4143 #endif // HAVE_TPETRA_DEBUG
4144 this->replaceLocalValue (lclRow, col, ScalarValue);
4147 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4153 const bool atomic)
const
4157 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4158 #ifdef HAVE_TEUCHOS_DEBUG
4159 TEUCHOS_TEST_FOR_EXCEPTION(
4160 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4162 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4163 <<
" is not present on this process "
4164 << this->getMap ()->getComm ()->getRank () <<
".");
4165 TEUCHOS_TEST_FOR_EXCEPTION(
4166 vectorIndexOutOfRange(col),
4168 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4169 <<
" of the multivector is invalid.");
4171 this->sumIntoLocalValue (lclRow, col, value, atomic);
4174 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4176 Teuchos::ArrayRCP<T>
4182 typename dual_view_type::array_layout,
4184 const size_t col = isConstantStride () ? j : whichVectors_[j];
4185 col_dual_view_type X_col =
4186 Kokkos::subview (view_, Kokkos::ALL (), col);
4187 return Kokkos::Compat::persistingView (X_col.d_view);
4191 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4195 view_.clear_sync_state ();
4198 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4216 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4220 view_.sync_device ();
4223 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4227 return view_.need_sync_host ();
4230 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4234 return view_.need_sync_device ();
4237 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4241 view_.modify_device ();
4244 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4248 view_.modify_host ();
4251 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4255 return view_.view_device ();
4258 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4262 return view_.view_host ();
4265 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4270 using Teuchos::TypeNameTraits;
4272 std::ostringstream out;
4273 out <<
"\"" << className <<
"\": {";
4274 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4275 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4276 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4277 <<
", Node" << Node::name ()
4279 if (this->getObjectLabel () !=
"") {
4280 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4282 out <<
", numRows: " << this->getGlobalLength ();
4283 if (className !=
"Tpetra::Vector") {
4284 out <<
", numCols: " << this->getNumVectors ()
4285 <<
", isConstantStride: " << this->isConstantStride ();
4287 if (this->isConstantStride ()) {
4288 out <<
", columnStride: " << this->getStride ();
4295 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4300 return this->descriptionImpl (
"Tpetra::MultiVector");
4303 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4308 typedef LocalOrdinal LO;
4311 if (vl <= Teuchos::VERB_LOW) {
4312 return std::string ();
4314 auto map = this->getMap ();
4315 if (map.is_null ()) {
4316 return std::string ();
4318 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4319 auto outp = Teuchos::getFancyOStream (outStringP);
4320 Teuchos::FancyOStream& out = *outp;
4321 auto comm = map->getComm ();
4322 const int myRank = comm->getRank ();
4323 const int numProcs = comm->getSize ();
4325 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4326 Teuchos::OSTab tab1 (out);
4329 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4330 out <<
"Local number of rows: " << lclNumRows << endl;
4332 if (vl > Teuchos::VERB_MEDIUM) {
4335 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4336 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4338 if (this->isConstantStride ()) {
4339 out <<
"Column stride: " << this->getStride () << endl;
4342 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4346 typename dual_view_type::t_host X_host;
4347 if (need_sync_host ()) {
4353 auto X_dev = getLocalViewDevice ();
4354 auto X_host_copy = Kokkos::create_mirror_view (X_dev);
4356 X_host = X_host_copy;
4362 X_host = getLocalViewHost ();
4366 out <<
"Values: " << endl
4368 const LO numCols =
static_cast<LO
> (this->getNumVectors ());
4370 for (LO i = 0; i < lclNumRows; ++i) {
4372 if (i + 1 < lclNumRows) {
4378 for (LO i = 0; i < lclNumRows; ++i) {
4379 for (LO j = 0; j < numCols; ++j) {
4381 if (j + 1 < numCols) {
4385 if (i + 1 < lclNumRows) {
4395 return outStringP->str ();
4398 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4402 const std::string& className,
4403 const Teuchos::EVerbosityLevel verbLevel)
const
4405 using Teuchos::TypeNameTraits;
4406 using Teuchos::VERB_DEFAULT;
4407 using Teuchos::VERB_NONE;
4408 using Teuchos::VERB_LOW;
4410 const Teuchos::EVerbosityLevel vl =
4411 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4413 if (vl == VERB_NONE) {
4420 auto map = this->getMap ();
4421 if (map.is_null ()) {
4424 auto comm = map->getComm ();
4425 if (comm.is_null ()) {
4429 const int myRank = comm->getRank ();
4438 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4442 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4443 out <<
"\"" << className <<
"\":" << endl;
4444 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4446 out <<
"Template parameters:" << endl;
4447 Teuchos::OSTab tab2 (out);
4448 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4449 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4450 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4451 <<
"Node: " << Node::name () << endl;
4453 if (this->getObjectLabel () !=
"") {
4454 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4456 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4457 if (className !=
"Tpetra::Vector") {
4458 out <<
"Number of columns: " << this->getNumVectors () << endl;
4465 if (vl > VERB_LOW) {
4466 const std::string lclStr = this->localDescribeToString (vl);
4467 ::Tpetra::Details::gathervPrint (out, lclStr, *comm);
4471 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4475 const Teuchos::EVerbosityLevel verbLevel)
const
4477 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4480 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4485 replaceMap (newMap);
4488 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4493 using ::Tpetra::Details::localDeepCopy;
4494 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4496 TEUCHOS_TEST_FOR_EXCEPTION
4498 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4499 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4500 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4501 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4502 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4504 TEUCHOS_TEST_FOR_EXCEPTION
4505 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4506 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4507 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4508 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4514 this->clear_sync_state();
4515 this->modify_device ();
4520 if (src_last_updated_on_host) {
4521 localDeepCopy (this->getLocalViewDevice (),
4523 this->isConstantStride (),
4525 this->whichVectors_ (),
4529 localDeepCopy (this->getLocalViewDevice (),
4531 this->isConstantStride (),
4533 this->whichVectors_ (),
4538 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4543 using ::Tpetra::Details::PackTraits;
4546 const size_t l1 = this->getLocalLength();
4548 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4555 auto v1 = this->getLocalViewHost ();
4557 if (PackTraits<ST>::packValueCount (v1(0,0)) !=
4558 PackTraits<ST>::packValueCount (v2(0,0))) {
4565 template <
class ST,
class LO,
class GO,
class NT>
4568 std::swap(mv.
map_, this->map_);
4569 std::swap(mv.
view_, this->view_);
4570 std::swap(mv.
origView_, this->origView_);
4574 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4575 template <
class ST,
class LO,
class GO,
class NT>
4578 const Teuchos::SerialDenseMatrix<int, ST>& src)
4580 using ::Tpetra::Details::localDeepCopy;
4582 using IST =
typename MV::impl_scalar_type;
4583 using input_view_type =
4584 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4585 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4586 using pair_type = std::pair<LO, LO>;
4588 const LO numRows =
static_cast<LO
> (src.numRows ());
4589 const LO numCols =
static_cast<LO
> (src.numCols ());
4591 TEUCHOS_TEST_FOR_EXCEPTION
4594 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4595 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4597 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4599 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4600 input_view_type src_orig (src_raw, src.stride (), numCols);
4601 input_view_type src_view =
4602 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4606 constexpr
bool src_isConstantStride =
true;
4607 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4611 src_isConstantStride,
4612 getMultiVectorWhichVectors (dst),
4616 template <
class ST,
class LO,
class GO,
class NT>
4618 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4619 const MultiVector<ST, LO, GO, NT>& src)
4621 using ::Tpetra::Details::localDeepCopy;
4622 using MV = MultiVector<ST, LO, GO, NT>;
4623 using IST =
typename MV::impl_scalar_type;
4624 using output_view_type =
4625 Kokkos::View<IST**, Kokkos::LayoutLeft,
4626 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4627 using pair_type = std::pair<LO, LO>;
4629 const LO numRows =
static_cast<LO
> (dst.numRows ());
4630 const LO numCols =
static_cast<LO
> (dst.numCols ());
4632 TEUCHOS_TEST_FOR_EXCEPTION
4633 (numRows != static_cast<LO> (src.getLocalLength ()) ||
4634 numCols != static_cast<LO> (src.getNumVectors ()),
4635 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4636 << src.getMap ()->getComm ()->getRank () <<
", src is "
4637 << src.getLocalLength () <<
" x " << src.getNumVectors ()
4638 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4640 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4641 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4643 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4645 constexpr
bool dst_isConstantStride =
true;
4646 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4649 if (src.need_sync_host ()) {
4650 localDeepCopy (dst_view,
4651 src.getLocalViewDevice (),
4652 dst_isConstantStride,
4653 src.isConstantStride (),
4655 getMultiVectorWhichVectors (src));
4659 src.getLocalViewHost (),
4660 dst_isConstantStride,
4661 src.isConstantStride (),
4663 getMultiVectorWhichVectors (src));
4666 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4668 template <
class Scalar,
class LO,
class GO,
class NT>
4669 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4673 typedef MultiVector<Scalar, LO, GO, NT> MV;
4674 return Teuchos::rcp (
new MV (map, numVectors));
4677 template <
class ST,
class LO,
class GO,
class NT>
4678 MultiVector<ST, LO, GO, NT>
4695 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4696 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4697 template class MultiVector< SCALAR , LO , GO , NODE >; \
4698 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4699 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4700 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4701 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4704 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4705 template class MultiVector< SCALAR , LO , GO , NODE >; \
4706 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4708 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4710 #endif // TPETRA_MULTIVECTOR_DEF_HPP
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
dual_view_type::t_host getLocalViewHost() const
A local Kokkos::View of host memory.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with the given verbosity level to a FancyOStream.
dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
Teuchos::ArrayRCP< Scalar > get1dViewNonConst()
Nonconst persisting (1-D) view of this multivector's local values.
void meanValue(const Teuchos::ArrayView< impl_scalar_type > &means) const
Compute mean (average) value of each column.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
void get2dCopy(const Teuchos::ArrayView< const Teuchos::ArrayView< Scalar > > &ArrayOfPtrs) const
Fill the given array with a copy of this multivector's local values.
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
void clear_sync_state()
Clear "modified" flags on both host and device sides.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, execution_space > dual_view_type
Kokkos::DualView specialization used by this class.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
void replaceMap(const Teuchos::RCP< const map_type > &map)
Replace the underlying Map in place.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< Scalar > > get2dViewNonConst()
Return non-const persisting pointers to values.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
std::string localDescribeToString(const Teuchos::EVerbosityLevel vl) const
Print the calling process' verbose describe() information to the returned string. ...
One or more distributed dense vectors.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subCopy(const Teuchos::Range1D &colRng) const
Return a MultiVector with copies of selected columns.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
void sumIntoGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value, const bool atomic=useAtomicUpdatesByDefault) const
Update (+=) a value in host memory, using global row index.
bool isDistributed() const
Whether this is a globally distributed object.
void get1dCopy(const Teuchos::ArrayView< Scalar > &A, const size_t LDA) const
Fill the given array with a copy of this multivector's local values.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
static bool debug()
Whether Tpetra is in debug mode.
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
bool isSameSize(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &vec) const
size_t getOrigNumLocalCols() const
"Original" number of columns in the (local) data.
Teuchos::ArrayRCP< const Scalar > getData(size_t j) const
Const view of the local values in a particular vector of this multivector.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Insert new values that don't currently exist.
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
void sync_host()
Synchronize to Host.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
void multiply(Teuchos::ETransp transA, Teuchos::ETransp transB, const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Scalar &beta)
Matrix-matrix multiplication: this = beta*this + alpha*op(A)*op(B).
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
void elementWiseMultiply(Scalar scalarAB, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarThis)
Multiply a Vector A elementwise by a MultiVector B.
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
dual_view_type origView_
The "original view" of the MultiVector's data.
std::string descriptionImpl(const std::string &className) const
Implementation of description() for this class, and its subclass Vector.
void normInf(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the infinity-norm of each vector (column), storing the result in a host View.
void replaceGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value) const
Replace value in host memory, using global row index.
typename Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Declaration of Tpetra::Details::isInterComm.
void norm2(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the two-norm of each vector (column), storing the result in a host View.
void dot(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayView< dot_type > &dots) const
Compute the dot product of each corresponding pair of vectors (columns) in A and B.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subViewNonConst(const Teuchos::Range1D &colRng)
Return a MultiVector with views of selected columns.
virtual size_t constantNumberOfPackets() const
Number of packets to send per LID.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
void modify_device()
Mark data as modified on the device side.
Replace old value with maximum of magnitudes of old and new values.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
Abstract base class for objects that can be the source of an Import or Export operation.
Teuchos::ArrayRCP< Scalar > getDataNonConst(size_t j)
View of the local values in a particular vector of this multivector.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Replace existing values with new values.
void swap(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &mv)
Swap contents of mv with contents of *this.
void randomize()
Set all values in the multivector to pseudorandom numbers.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetViewNonConst(const Teuchos::RCP< const map_type > &subMap, const size_t offset)
Return a nonconst view of a subset of rows.
Teuchos::ArrayRCP< T > getSubArrayRCP(Teuchos::ArrayRCP< T > arr, size_t j) const
Persisting view of j-th column in the given ArrayRCP.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
void modify_host()
Mark data as modified on the host side.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
void replaceLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &value) const
Replace value in host memory, using local (row) index.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
void norm1(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the one-norm of each vector (column), storing the result in a host view.
A parallel distribution of indices over processes.
size_t getStride() const
Stride between columns in the multivector.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual std::string description() const
A simple one-line description of this object.
void reduce()
Sum values of a locally replicated multivector across all processes.
void describeImpl(Teuchos::FancyOStream &out, const std::string &className, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Implementation of describe() for this class, and its subclass Vector.
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual bool checkSizes(const SrcDistObject &sourceObj)
Whether data redistribution between sourceObj and this object is legal.
dual_view_type::t_dev getLocalViewDevice() const
A local Kokkos::View of device memory.
void sumIntoLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &val, const bool atomic=useAtomicUpdatesByDefault) const
Update (+=) a value in host memory, using local row index.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< const Scalar > > get2dView() const
Return const persisting pointers to values.
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
void reciprocal(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,j).
Base class for distributed Tpetra objects that support data redistribution.
void localDeepCopy(const DstViewType &dst, const SrcViewType &src, const bool dstConstStride, const bool srcConstStride, const DstWhichVecsType &dstWhichVecs, const SrcWhichVecsType &srcWhichVecs)
Implementation of Tpetra::MultiVector deep copy of local data.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
void abs(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise absolute values of input Multi-vector in target: A = abs(this)
EWhichNorm
Input argument for normImpl() (which see).
Teuchos::ArrayRCP< const Scalar > get1dView() const
Const persisting (1-D) view of this multivector's local values.
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Map and their communicator.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
void sync_device()
Synchronize to Device.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.