42 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
43 #define TPETRA_MULTIVECTOR_DEF_HPP
54 #include "Tpetra_Vector.hpp"
57 #include "Tpetra_Details_checkView.hpp"
59 #include "Tpetra_Details_gathervPrint.hpp"
66 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
67 # include "Teuchos_SerialDenseMatrix.hpp"
68 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
69 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
70 #include "KokkosCompat_View.hpp"
71 #include "KokkosBlas.hpp"
72 #include "KokkosKernels_Utils.hpp"
73 #include "Kokkos_Random.hpp"
74 #include "Kokkos_ArithTraits.hpp"
78 #ifdef HAVE_TPETRA_INST_FLOAT128
81 template<
class Generator>
82 struct rand<Generator, __float128> {
83 static KOKKOS_INLINE_FUNCTION __float128 max ()
85 return static_cast<__float128
> (1.0);
87 static KOKKOS_INLINE_FUNCTION __float128
92 const __float128 scalingFactor =
93 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
94 static_cast<__float128> (2.0);
95 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
96 const __float128 lowerOrderTerm =
97 static_cast<__float128
> (gen.drand ()) * scalingFactor;
98 return higherOrderTerm + lowerOrderTerm;
100 static KOKKOS_INLINE_FUNCTION __float128
101 draw (Generator& gen,
const __float128& range)
104 const __float128 scalingFactor =
105 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
106 static_cast<__float128> (2.0);
107 const __float128 higherOrderTerm =
108 static_cast<__float128
> (gen.drand (range));
109 const __float128 lowerOrderTerm =
110 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
111 return higherOrderTerm + lowerOrderTerm;
113 static KOKKOS_INLINE_FUNCTION __float128
114 draw (Generator& gen,
const __float128& start,
const __float128& end)
117 const __float128 scalingFactor =
118 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
119 static_cast<__float128> (2.0);
120 const __float128 higherOrderTerm =
121 static_cast<__float128
> (gen.drand (start, end));
122 const __float128 lowerOrderTerm =
123 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
124 return higherOrderTerm + lowerOrderTerm;
128 #endif // HAVE_TPETRA_INST_FLOAT128
146 template<
class ST,
class LO,
class GO,
class NT>
148 allocDualView (
const size_t lclNumRows,
149 const size_t numCols,
150 const bool zeroOut =
true,
151 const bool allowPadding =
false)
153 using ::Tpetra::Details::Behavior;
154 using Kokkos::AllowPadding;
155 using Kokkos::view_alloc;
156 using Kokkos::WithoutInitializing;
158 typedef typename dual_view_type::t_dev dev_view_type;
163 const std::string label (
"MV::DualView");
164 const bool debug = Behavior::debug ();
174 dev_view_type d_view;
177 d_view = dev_view_type (view_alloc (label, AllowPadding),
178 lclNumRows, numCols);
181 d_view = dev_view_type (view_alloc (label),
182 lclNumRows, numCols);
187 d_view = dev_view_type (view_alloc (label,
190 lclNumRows, numCols);
193 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
194 lclNumRows, numCols);
205 const ST nan = Kokkos::Details::ArithTraits<ST>::nan ();
206 KokkosBlas::fill (d_view, nan);
210 TEUCHOS_TEST_FOR_EXCEPTION
211 (static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
212 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
213 "allocDualView: d_view's dimensions actual dimensions do not match "
214 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
215 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
216 <<
". Please report this bug to the Tpetra developers.");
219 dual_view_type dv (d_view, Kokkos::create_mirror_view (d_view));
233 template<
class T,
class ExecSpace>
234 struct MakeUnmanagedView {
247 typedef typename Kokkos::Impl::if_c<
248 Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
249 typename ExecSpace::memory_space,
250 Kokkos::HostSpace>::value,
251 typename ExecSpace::device_type,
252 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
253 typedef Kokkos::LayoutLeft array_layout;
254 typedef Kokkos::View<T*, array_layout, host_exec_space,
255 Kokkos::MemoryUnmanaged> view_type;
257 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
259 const size_t numEnt =
static_cast<size_t> (x_in.size ());
263 return view_type (x_in.getRawPtr (), numEnt);
271 template<
class DualViewType>
273 takeSubview (
const DualViewType& X,
274 const Kokkos::Impl::ALL_t&,
275 const std::pair<size_t, size_t>& colRng)
277 if (X.extent (0) == 0 && X.extent (1) != 0) {
278 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
281 return subview (X, Kokkos::ALL (), colRng);
288 template<
class DualViewType>
290 takeSubview (
const DualViewType& X,
291 const std::pair<size_t, size_t>& rowRng,
292 const std::pair<size_t, size_t>& colRng)
294 if (X.extent (0) == 0 && X.extent (1) != 0) {
295 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
298 return subview (X, rowRng, colRng);
302 template<
class DualViewType>
304 getDualViewStride (
const DualViewType& dv)
308 const size_t LDA = dv.d_view.stride (1);
309 const size_t numRows = dv.extent (0);
312 return (numRows == 0) ? size_t (1) : numRows;
319 template<
class ViewType>
321 getViewStride (
const ViewType& view)
323 const size_t LDA = view.stride (1);
324 const size_t numRows = view.extent (0);
327 return (numRows == 0) ? size_t (1) : numRows;
334 template <
class SC,
class LO,
class GO,
class NT>
336 multiVectorRunNormOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
338 if (! X.need_sync_device ()) {
342 constexpr
size_t localLengthThreshold = 10000;
343 return X.getLocalLength () <= localLengthThreshold;
347 template <
class SC,
class LO,
class GO,
class NT>
349 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
355 using val_type =
typename MV::impl_scalar_type;
356 using mag_type =
typename MV::mag_type;
357 using dual_view_type =
typename MV::dual_view_type;
360 auto comm = map.is_null () ?
nullptr : map->getComm ().getRawPtr ();
361 auto whichVecs = getMultiVectorWhichVectors (X);
365 const bool runOnHost = multiVectorRunNormOnHost (X);
367 using view_type =
typename dual_view_type::t_host;
368 using array_layout =
typename view_type::array_layout;
369 using device_type =
typename view_type::device_type;
375 normImpl<val_type, array_layout, device_type,
376 mag_type> (norms, X_lcl, whichNorm, whichVecs,
377 isConstantStride, isDistributed, comm);
380 using view_type =
typename dual_view_type::t_dev;
381 using array_layout =
typename view_type::array_layout;
382 using device_type =
typename view_type::device_type;
388 normImpl<val_type, array_layout, device_type,
389 mag_type> (norms, X_lcl, whichNorm, whichVecs,
390 isConstantStride, isDistributed, comm);
398 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
400 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
401 vectorIndexOutOfRange (
const size_t VectorIndex)
const {
402 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
405 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
411 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
414 const size_t numVecs,
415 const bool zeroOut) :
421 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
425 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
428 const Teuchos::DataAccess copyOrView) :
430 view_ (source.view_),
431 origView_ (source.origView_),
432 whichVectors_ (source.whichVectors_)
435 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
436 "const Teuchos::DataAccess): ";
440 if (copyOrView == Teuchos::Copy) {
444 this->
view_ = cpy.view_;
448 else if (copyOrView == Teuchos::View) {
451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
452 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
453 "invalid value " << copyOrView <<
". Valid values include "
454 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
455 << Teuchos::View <<
".");
459 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
462 const dual_view_type& view) :
467 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
468 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
469 map->getNodeNumElements ();
470 const size_t lclNumRows_view = view.extent (0);
471 const size_t LDA = getDualViewStride (view);
473 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
474 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
475 std::invalid_argument,
"Kokkos::DualView does not match Map. "
476 "map->getNodeNumElements() = " << lclNumRows_map
477 <<
", view.extent(0) = " << lclNumRows_view
478 <<
", and getStride() = " << LDA <<
".");
480 using ::Tpetra::Details::Behavior;
481 const bool debug = Behavior::debug ();
483 using ::Tpetra::Details::checkGlobalDualViewValidity;
484 std::ostringstream gblErrStrm;
485 const bool verbose = Behavior::verbose ();
486 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
487 const bool gblValid =
488 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
490 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
491 (! gblValid, std::runtime_error, gblErrStrm.str ());
496 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
499 const typename dual_view_type::t_dev& d_view) :
502 using Teuchos::ArrayRCP;
504 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
508 const size_t LDA = getViewStride (d_view);
509 const size_t lclNumRows = map->getNodeNumElements ();
510 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
511 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
512 "Kokkos::View. map->getNodeNumElements() = " << lclNumRows
513 <<
", View's column stride = " << LDA
514 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
516 auto h_view = Kokkos::create_mirror_view (d_view);
519 using ::Tpetra::Details::Behavior;
520 const bool debug = Behavior::debug ();
522 using ::Tpetra::Details::checkGlobalDualViewValidity;
523 std::ostringstream gblErrStrm;
524 const bool verbose = Behavior::verbose ();
525 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
526 const bool gblValid =
527 checkGlobalDualViewValidity (&gblErrStrm,
view_, verbose,
529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
530 (! gblValid, std::runtime_error, gblErrStrm.str ());
539 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
542 const dual_view_type& view,
543 const dual_view_type& origView) :
548 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
550 const size_t LDA = getDualViewStride (origView);
552 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
553 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
554 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
555 <<
". This may also mean that the input origView's first dimension (number "
556 "of rows = " << origView.extent (0) <<
") does not not match the number "
557 "of entries in the Map on the calling process.");
559 using ::Tpetra::Details::Behavior;
560 const bool debug = Behavior::debug ();
562 using ::Tpetra::Details::checkGlobalDualViewValidity;
563 std::ostringstream gblErrStrm;
564 const bool verbose = Behavior::verbose ();
565 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
566 const bool gblValid_0 =
567 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
569 const bool gblValid_1 =
570 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
572 const bool gblValid = gblValid_0 && gblValid_1;
573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
574 (! gblValid, std::runtime_error, gblErrStrm.str ());
579 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
582 const dual_view_type& view,
583 const Teuchos::ArrayView<const size_t>& whichVectors) :
587 whichVectors_ (whichVectors.begin (), whichVectors.end ())
590 using Kokkos::subview;
591 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
593 using ::Tpetra::Details::Behavior;
594 const bool debug = Behavior::debug ();
596 using ::Tpetra::Details::checkGlobalDualViewValidity;
597 std::ostringstream gblErrStrm;
598 const bool verbose = Behavior::verbose ();
599 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
600 const bool gblValid =
601 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
603 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
604 (! gblValid, std::runtime_error, gblErrStrm.str ());
607 const size_t lclNumRows = map.is_null () ? size_t (0) :
608 map->getNodeNumElements ();
615 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
616 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
617 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
618 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
619 if (whichVectors.size () != 0) {
620 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
621 view.extent (1) != 0 && view.extent (1) == 0,
622 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
623 " = " << whichVectors.size () <<
" > 0.");
624 size_t maxColInd = 0;
625 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
626 for (size_type k = 0; k < whichVectors.size (); ++k) {
627 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
628 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
629 std::invalid_argument,
"whichVectors[" << k <<
"] = "
630 "Teuchos::OrdinalTraits<size_t>::invalid().");
631 maxColInd = std::max (maxColInd, whichVectors[k]);
633 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
634 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
635 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
636 <<
" <= max(whichVectors) = " << maxColInd <<
".");
641 const size_t LDA = getDualViewStride (view);
642 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
643 (LDA < lclNumRows, std::invalid_argument,
644 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
646 if (whichVectors.size () == 1) {
655 const std::pair<size_t, size_t> colRng (whichVectors[0],
656 whichVectors[0] + 1);
664 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
667 const dual_view_type& view,
668 const dual_view_type& origView,
669 const Teuchos::ArrayView<const size_t>& whichVectors) :
672 origView_ (origView),
673 whichVectors_ (whichVectors.begin (), whichVectors.end ())
676 using Kokkos::subview;
677 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
679 using ::Tpetra::Details::Behavior;
680 const bool debug = Behavior::debug ();
682 using ::Tpetra::Details::checkGlobalDualViewValidity;
683 std::ostringstream gblErrStrm;
684 const bool verbose = Behavior::verbose ();
685 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
686 const bool gblValid_0 =
687 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
689 const bool gblValid_1 =
690 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
692 const bool gblValid = gblValid_0 && gblValid_1;
693 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
694 (! gblValid, std::runtime_error, gblErrStrm.str ());
704 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
705 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
706 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
707 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
708 if (whichVectors.size () != 0) {
709 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
710 view.extent (1) != 0 && view.extent (1) == 0,
711 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
712 " = " << whichVectors.size () <<
" > 0.");
713 size_t maxColInd = 0;
714 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
715 for (size_type k = 0; k < whichVectors.size (); ++k) {
716 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
717 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
718 std::invalid_argument,
"whichVectors[" << k <<
"] = "
719 "Teuchos::OrdinalTraits<size_t>::invalid().");
720 maxColInd = std::max (maxColInd, whichVectors[k]);
722 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
723 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
724 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
725 <<
" <= max(whichVectors) = " << maxColInd <<
".");
730 const size_t LDA = getDualViewStride (origView);
731 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
732 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
733 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
734 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
735 <<
", origView.stride(1) = " << origView.d_view.stride(1) <<
".");
737 if (whichVectors.size () == 1) {
746 const std::pair<size_t, size_t> colRng (whichVectors[0],
747 whichVectors[0] + 1);
755 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
758 const Teuchos::ArrayView<const Scalar>& data,
760 const size_t numVecs) :
763 typedef LocalOrdinal LO;
764 typedef GlobalOrdinal GO;
765 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
771 const size_t lclNumRows =
772 map.is_null () ? size_t (0) : map->getNodeNumElements ();
773 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
774 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
775 "map->getNodeNumElements() = " << lclNumRows <<
".");
777 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
778 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
779 (static_cast<size_t> (data.size ()) < minNumEntries,
780 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
781 "entries, given the input Map and number of vectors in the MultiVector."
782 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
783 "map->getNodeNumElements () = " << minNumEntries <<
".");
786 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
799 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
800 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
801 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
806 const size_t outStride =
807 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
808 if (LDA == outStride) {
814 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
816 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
818 for (
size_t j = 0; j < numVecs; ++j) {
819 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
820 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
826 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
829 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
830 const size_t numVecs) :
834 typedef LocalOrdinal LO;
835 typedef GlobalOrdinal GO;
836 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
839 const size_t lclNumRows =
840 map.is_null () ? size_t (0) : map->getNodeNumElements ();
841 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
842 (numVecs < 1 || numVecs != static_cast<size_t> (ArrayOfPtrs.size ()),
843 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
844 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
845 for (
size_t j = 0; j < numVecs; ++j) {
846 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
847 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
848 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
849 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
850 << X_j_av.size () <<
" < map->getNodeNumElements() = " << lclNumRows
854 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
860 using array_layout =
typename decltype (X_out)::array_layout;
861 using input_col_view_type =
typename Kokkos::View<
const IST*,
864 Kokkos::MemoryUnmanaged>;
866 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
867 for (
size_t j = 0; j < numVecs; ++j) {
868 Teuchos::ArrayView<const IST> X_j_av =
869 Teuchos::av_reinterpret_cast<
const IST> (ArrayOfPtrs[j]);
870 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
871 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
877 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
880 return whichVectors_.empty ();
883 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
888 if (this->getMap ().is_null ()) {
889 return static_cast<size_t> (0);
891 return this->getMap ()->getNodeNumElements ();
895 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
900 if (this->getMap ().is_null ()) {
901 return static_cast<size_t> (0);
903 return this->getMap ()->getGlobalNumElements ();
907 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
912 return isConstantStride () ? getDualViewStride (origView_) : size_t (0);
915 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
924 const this_type* src =
dynamic_cast<const this_type*
> (&sourceObj);
925 if (src ==
nullptr) {
935 return src->getNumVectors () == this->getNumVectors ();
939 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
943 return this->getNumVectors ();
946 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
949 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
951 #else // TPETRA_ENABLE_DEPRECATED_CODE
953 #endif // TPETRA_ENABLE_DEPRECATED_CODE
955 const size_t numSameIDs,
956 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
957 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs)
959 using ::Tpetra::Details::Behavior;
961 using ::Tpetra::Details::ProfilingRegion;
963 using KokkosRefactor::Details::permute_array_multi_column;
964 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
965 using Kokkos::Compat::create_const_view;
967 const char tfecfFuncName[] =
"copyAndPermute: ";
968 ProfilingRegion regionCAP (
"Tpetra::MultiVector::copyAndPermute");
970 const bool verbose = Behavior::verbose ();
971 std::unique_ptr<std::string> prefix;
973 auto map = this->getMap ();
974 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
975 const int myRank = comm.is_null () ? -1 : comm->getRank ();
976 std::ostringstream os;
977 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
978 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
979 os <<
"Start" << endl;
980 std::cerr << os.str ();
983 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
984 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
985 std::logic_error,
"permuteToLIDs.extent(0) = "
986 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
987 << permuteFromLIDs.extent (0) <<
".");
990 const MV& sourceMV =
dynamic_cast<const MV&
> (sourceObj);
991 const size_t numCols = this->getNumVectors ();
995 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
996 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
997 std::logic_error,
"Input MultiVector needs sync to both host "
999 const bool copyOnHost = sourceMV.need_sync_device ();
1001 std::ostringstream os;
1002 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1003 std::cerr << os.str ();
1007 if (this->need_sync_host ()) {
1010 this->modify_host ();
1013 if (this->need_sync_device ()) {
1014 this->sync_device ();
1016 this->modify_device ();
1020 std::ostringstream os;
1021 os << *prefix <<
"Copy" << endl;
1022 std::cerr << os.str ();
1047 if (numSameIDs > 0) {
1048 const std::pair<size_t, size_t> rows (0, numSameIDs);
1050 auto tgt_h = this->getLocalViewHost ();
1051 auto src_h = create_const_view (sourceMV.getLocalViewHost ());
1053 for (
size_t j = 0; j < numCols; ++j) {
1054 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1055 const size_t srcCol =
1056 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1058 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1059 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1064 auto tgt_d = this->getLocalViewDevice ();
1065 auto src_d = create_const_view (sourceMV.getLocalViewDevice ());
1067 for (
size_t j = 0; j < numCols; ++j) {
1068 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1069 const size_t srcCol =
1070 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1072 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1073 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1089 if (permuteFromLIDs.extent (0) == 0 ||
1090 permuteToLIDs.extent (0) == 0) {
1092 std::ostringstream os;
1093 os << *prefix <<
"No permutations. Done!" << endl;
1094 std::cerr << os.str ();
1100 std::ostringstream os;
1101 os << *prefix <<
"Permute" << endl;
1102 std::cerr << os.str ();
1107 const bool nonConstStride =
1108 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1111 std::ostringstream os;
1112 os << *prefix <<
"nonConstStride="
1113 << (nonConstStride ?
"true" :
"false") << endl;
1114 std::cerr << os.str ();
1121 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1122 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1123 if (nonConstStride) {
1124 if (this->whichVectors_.size () == 0) {
1125 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1126 tmpTgt.modify_host ();
1127 for (
size_t j = 0; j < numCols; ++j) {
1128 tmpTgt.h_view(j) = j;
1131 tmpTgt.sync_device ();
1133 tgtWhichVecs = tmpTgt;
1136 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1138 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1142 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1143 (static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1144 this->getNumVectors (),
1145 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1146 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1147 this->getNumVectors () <<
".");
1149 if (sourceMV.whichVectors_.size () == 0) {
1150 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1151 tmpSrc.modify_host ();
1152 for (
size_t j = 0; j < numCols; ++j) {
1153 tmpSrc.h_view(j) = j;
1156 tmpSrc.sync_device ();
1158 srcWhichVecs = tmpSrc;
1161 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1162 sourceMV.whichVectors_ ();
1164 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1168 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1169 (static_cast<size_t> (srcWhichVecs.extent (0)) !=
1170 sourceMV.getNumVectors (), std::logic_error,
1171 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1172 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1178 std::ostringstream os;
1179 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1180 std::cerr << os.str ();
1182 auto tgt_h = this->getLocalViewHost ();
1183 auto src_h = create_const_view (sourceMV.getLocalViewHost ());
1185 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1186 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1187 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1188 auto permuteFromLIDs_h =
1189 create_const_view (permuteFromLIDs.view_host ());
1192 std::ostringstream os;
1193 os << *prefix <<
"Permute on host" << endl;
1194 std::cerr << os.str ();
1196 if (nonConstStride) {
1199 auto tgtWhichVecs_h =
1200 create_const_view (tgtWhichVecs.view_host ());
1201 auto srcWhichVecs_h =
1202 create_const_view (srcWhichVecs.view_host ());
1203 permute_array_multi_column_variable_stride (tgt_h, src_h,
1207 srcWhichVecs_h, numCols);
1210 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1211 permuteFromLIDs_h, numCols);
1216 std::ostringstream os;
1217 os << *prefix <<
"Get permute LIDs on device" << endl;
1218 std::cerr << os.str ();
1220 auto tgt_d = this->getLocalViewDevice ();
1221 auto src_d = create_const_view (sourceMV.getLocalViewDevice ());
1223 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1224 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1225 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1226 auto permuteFromLIDs_d =
1227 create_const_view (permuteFromLIDs.view_device ());
1230 std::ostringstream os;
1231 os << *prefix <<
"Permute on device" << endl;
1232 std::cerr << os.str ();
1234 if (nonConstStride) {
1237 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1238 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1239 permute_array_multi_column_variable_stride (tgt_d, src_d,
1243 srcWhichVecs_d, numCols);
1246 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1247 permuteFromLIDs_d, numCols);
1252 std::ostringstream os;
1253 os << *prefix <<
"Done!" << endl;
1254 std::cerr << os.str ();
1258 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1260 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1261 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1263 #else // TPETRA_ENABLE_DEPRECATED_CODE
1265 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1266 (
const SrcDistObject& sourceObj,
1267 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1268 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1269 Kokkos::DualView<size_t*, buffer_device_type> ,
1270 size_t& constantNumPackets,
1273 using ::Tpetra::Details::Behavior;
1274 using ::Tpetra::Details::ProfilingRegion;
1276 using Kokkos::Compat::create_const_view;
1277 using Kokkos::Compat::getKokkosViewDeepCopy;
1279 using MV = MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
1280 const char tfecfFuncName[] =
"packAndPrepare: ";
1281 ProfilingRegion regionPAP (
"Tpetra::MultiVector::packAndPrepare");
1289 const bool debugCheckIndices = Behavior::debug ();
1294 const bool printDebugOutput = Behavior::verbose ();
1296 std::unique_ptr<std::string> prefix;
1297 if (printDebugOutput) {
1298 auto map = this->getMap ();
1299 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1300 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1301 std::ostringstream os;
1302 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1303 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1304 os <<
"Start" << endl;
1305 std::cerr << os.str ();
1309 const MV& sourceMV =
dynamic_cast<const MV&
> (sourceObj);
1311 const size_t numCols = sourceMV.getNumVectors ();
1316 constantNumPackets = numCols;
1320 if (exportLIDs.extent (0) == 0) {
1321 if (printDebugOutput) {
1322 std::ostringstream os;
1323 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1324 std::cerr << os.str ();
1344 const size_t numExportLIDs = exportLIDs.extent (0);
1345 const size_t newExportsSize = numCols * numExportLIDs;
1346 if (printDebugOutput) {
1347 std::ostringstream os;
1348 os << *prefix <<
"realloc: "
1349 <<
"numExportLIDs: " << numExportLIDs
1350 <<
", exports.extent(0): " << exports.extent (0)
1351 <<
", newExportsSize: " << newExportsSize << std::endl;
1352 std::cerr << os.str ();
1358 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1359 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1360 std::logic_error,
"Input MultiVector needs sync to both host "
1362 const bool packOnHost = sourceMV.need_sync_device ();
1363 auto src_dev = sourceMV.getLocalViewHost ();
1364 auto src_host = sourceMV.getLocalViewDevice ();
1365 if (printDebugOutput) {
1366 std::ostringstream os;
1367 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1368 std::cerr << os.str ();
1375 exports.modify_host ();
1378 exports.modify_device ();
1394 if (sourceMV.isConstantStride ()) {
1395 using KokkosRefactor::Details::pack_array_single_column;
1396 if (printDebugOutput) {
1397 std::ostringstream os;
1398 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1399 std::cerr << os.str ();
1402 pack_array_single_column (exports.view_host (),
1403 create_const_view (src_host),
1404 exportLIDs.view_host (),
1409 pack_array_single_column (exports.view_device (),
1410 create_const_view (src_dev),
1411 exportLIDs.view_device (),
1417 using KokkosRefactor::Details::pack_array_single_column;
1418 if (printDebugOutput) {
1419 std::ostringstream os;
1420 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1421 std::cerr << os.str ();
1424 pack_array_single_column (exports.view_host (),
1425 create_const_view (src_host),
1426 exportLIDs.view_host (),
1427 sourceMV.whichVectors_[0],
1431 pack_array_single_column (exports.view_device (),
1432 create_const_view (src_dev),
1433 exportLIDs.view_device (),
1434 sourceMV.whichVectors_[0],
1440 if (sourceMV.isConstantStride ()) {
1441 using KokkosRefactor::Details::pack_array_multi_column;
1442 if (printDebugOutput) {
1443 std::ostringstream os;
1444 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1445 std::cerr << os.str ();
1448 pack_array_multi_column (exports.view_host (),
1449 create_const_view (src_host),
1450 exportLIDs.view_host (),
1455 pack_array_multi_column (exports.view_device (),
1456 create_const_view (src_dev),
1457 exportLIDs.view_device (),
1463 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1464 if (printDebugOutput) {
1465 std::ostringstream os;
1466 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1468 std::cerr << os.str ();
1473 using IST = impl_scalar_type;
1474 using DV = Kokkos::DualView<IST*, device_type>;
1475 using HES =
typename DV::t_host::execution_space;
1476 using DES =
typename DV::t_dev::execution_space;
1477 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1479 pack_array_multi_column_variable_stride
1480 (exports.view_host (),
1481 create_const_view (src_host),
1482 exportLIDs.view_host (),
1483 getKokkosViewDeepCopy<HES> (whichVecs),
1488 pack_array_multi_column_variable_stride
1489 (exports.view_device (),
1490 create_const_view (src_dev),
1491 exportLIDs.view_device (),
1492 getKokkosViewDeepCopy<DES> (whichVecs),
1499 if (printDebugOutput) {
1500 std::ostringstream os;
1501 os << *prefix <<
"Done!" << endl;
1502 std::cerr << os.str ();
1507 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1509 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1510 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1512 #else // TPETRA_ENABLE_DEPRECATED_CODE
1514 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1515 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1516 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1517 Kokkos::DualView<size_t*, buffer_device_type> ,
1518 const size_t constantNumPackets,
1522 using ::Tpetra::Details::Behavior;
1523 using ::Tpetra::Details::ProfilingRegion;
1524 using KokkosRefactor::Details::unpack_array_multi_column;
1525 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1526 using Kokkos::Compat::getKokkosViewDeepCopy;
1528 using IST = impl_scalar_type;
1529 const char tfecfFuncName[] =
"unpackAndCombine: ";
1530 ProfilingRegion regionUAC (
"Tpetra::MultiVector::unpackAndCombine");
1538 const bool debugCheckIndices = Behavior::debug ();
1540 const bool printDebugOutput = Behavior::verbose ();
1541 std::unique_ptr<std::string> prefix;
1542 if (printDebugOutput) {
1543 auto map = this->getMap ();
1544 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1545 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1546 std::ostringstream os;
1547 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1548 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1549 os <<
"Start" << endl;
1550 std::cerr << os.str ();
1554 if (importLIDs.extent (0) == 0) {
1555 if (printDebugOutput) {
1556 std::ostringstream os;
1557 os << *prefix <<
"No imports. Done!" << endl;
1562 const size_t numVecs = getNumVectors ();
1563 if (debugCheckIndices) {
1564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1565 (static_cast<size_t> (imports.extent (0)) !=
1566 numVecs * importLIDs.extent (0),
1568 "imports.extent(0) = " << imports.extent (0)
1569 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1570 <<
" * " << importLIDs.extent (0) <<
" = "
1571 << numVecs * importLIDs.extent (0) <<
".");
1573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1574 (constantNumPackets == static_cast<size_t> (0), std::runtime_error,
1575 "constantNumPackets input argument must be nonzero.");
1577 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1578 (static_cast<size_t> (numVecs) !=
1579 static_cast<size_t> (constantNumPackets),
1580 std::runtime_error,
"constantNumPackets must equal numVecs.");
1586 const bool unpackOnHost = imports.need_sync_device ();
1588 if (printDebugOutput) {
1589 std::ostringstream os;
1590 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1592 std::cerr << os.str ();
1598 if (this->need_sync_host ()) {
1601 this->modify_host ();
1604 if (this->need_sync_device ()) {
1605 this->sync_device ();
1607 this->modify_device ();
1609 auto X_d = this->getLocalViewDevice ();
1610 auto X_h = this->getLocalViewHost ();
1611 auto imports_d = imports.view_device ();
1612 auto imports_h = imports.view_host ();
1613 auto importLIDs_d = importLIDs.view_device ();
1614 auto importLIDs_h = importLIDs.view_host ();
1616 Kokkos::DualView<size_t*, device_type> whichVecs;
1617 if (! isConstantStride ()) {
1618 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1619 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1621 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1623 whichVecs.modify_host ();
1627 whichVecs.modify_device ();
1631 auto whichVecs_d = whichVecs.view_device ();
1632 auto whichVecs_h = whichVecs.view_host ();
1642 if (numVecs > 0 && importLIDs.extent (0) > 0) {
1643 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
1644 using host_exec_space =
typename dual_view_type::t_host::execution_space;
1647 const bool use_atomic_updates = unpackOnHost ?
1648 host_exec_space::concurrency () != 1 :
1649 dev_exec_space::concurrency () != 1;
1651 if (printDebugOutput) {
1652 std::ostringstream os;
1654 std::cerr << os.str ();
1661 using op_type = KokkosRefactor::Details::InsertOp<IST>;
1662 if (isConstantStride ()) {
1664 unpack_array_multi_column (host_exec_space (),
1665 X_h, imports_h, importLIDs_h,
1666 op_type (), numVecs,
1672 unpack_array_multi_column (dev_exec_space (),
1673 X_d, imports_d, importLIDs_d,
1674 op_type (), numVecs,
1681 unpack_array_multi_column_variable_stride (host_exec_space (),
1691 unpack_array_multi_column_variable_stride (dev_exec_space (),
1702 else if (CM ==
ADD) {
1703 using op_type = KokkosRefactor::Details::AddOp<IST>;
1704 if (isConstantStride ()) {
1706 unpack_array_multi_column (host_exec_space (),
1707 X_h, imports_h, importLIDs_h,
1708 op_type (), numVecs,
1713 unpack_array_multi_column (dev_exec_space (),
1714 X_d, imports_d, importLIDs_d,
1715 op_type (), numVecs,
1722 unpack_array_multi_column_variable_stride (host_exec_space (),
1732 unpack_array_multi_column_variable_stride (dev_exec_space (),
1744 using op_type = KokkosRefactor::Details::AbsMaxOp<IST>;
1745 if (isConstantStride ()) {
1747 unpack_array_multi_column (host_exec_space (),
1748 X_h, imports_h, importLIDs_h,
1749 op_type (), numVecs,
1754 unpack_array_multi_column (dev_exec_space (),
1755 X_d, imports_d, importLIDs_d,
1756 op_type (), numVecs,
1763 unpack_array_multi_column_variable_stride (host_exec_space (),
1773 unpack_array_multi_column_variable_stride (dev_exec_space (),
1785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1786 (
true, std::logic_error,
"Invalid CombineMode");
1790 if (printDebugOutput) {
1791 std::ostringstream os;
1792 os << *prefix <<
"Nothing to unpack" << endl;
1793 std::cerr << os.str ();
1797 if (printDebugOutput) {
1798 std::ostringstream os;
1799 os << *prefix <<
"Done!" << endl;
1800 std::cerr << os.str ();
1804 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1809 if (isConstantStride ()) {
1810 return static_cast<size_t> (view_.extent (1));
1812 return static_cast<size_t> (whichVectors_.size ());
1820 gblDotImpl (
const RV& dotsOut,
1821 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
1822 const bool distributed)
1824 using Teuchos::REDUCE_MAX;
1825 using Teuchos::REDUCE_SUM;
1826 using Teuchos::reduceAll;
1827 typedef typename RV::non_const_value_type dot_type;
1829 const size_t numVecs = dotsOut.extent (0);
1844 if (distributed && ! comm.is_null ()) {
1847 const int nv =
static_cast<int> (numVecs);
1848 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
1850 if (commIsInterComm) {
1854 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
1856 const dot_type*
const lclSum = lclDots.data ();
1857 dot_type*
const gblSum = dotsOut.data ();
1858 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
1861 dot_type*
const inout = dotsOut.data ();
1862 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
1868 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1872 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
1874 using ::Tpetra::Details::Behavior;
1875 using Kokkos::subview;
1876 using Teuchos::Comm;
1877 using Teuchos::null;
1880 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
1882 typedef typename dual_view_type::t_dev XMV;
1883 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
1887 const size_t numVecs = this->getNumVectors ();
1891 const size_t lclNumRows = this->getLocalLength ();
1892 const size_t numDots =
static_cast<size_t> (dots.extent (0));
1893 const bool debug = Behavior::debug ();
1896 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
1897 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1898 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
1899 "compatible with the input MultiVector A. We only test for this "
1910 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1912 "MultiVectors do not have the same local length. "
1913 "this->getLocalLength() = " << lclNumRows <<
" != "
1915 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1917 "MultiVectors must have the same number of columns (vectors). "
1918 "this->getNumVectors() = " << numVecs <<
" != "
1920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1921 numDots != numVecs, std::runtime_error,
1922 "The output array 'dots' must have the same number of entries as the "
1923 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
1924 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
1926 const std::pair<size_t, size_t> colRng (0, numVecs);
1927 RV dotsOut = subview (dots, colRng);
1928 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
1929 this->getMap ()->getComm ();
1932 if (this->need_sync_device ()) {
1933 const_cast<MV*
>(
this)->sync_device ();
1936 const_cast<MV&
>(A).sync_device ();
1939 auto thisView = this->getLocalViewDevice ();
1942 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
1943 this->whichVectors_.getRawPtr (),
1946 gblDotImpl (dotsOut, comm, this->isDistributed ());
1950 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1955 using ::Tpetra::Details::ProfilingRegion;
1957 using dot_type =
typename MV::dot_type;
1958 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
1961 Teuchos::RCP<const Teuchos::Comm<int> > comm =
1962 map.is_null () ? Teuchos::null : map->getComm ();
1963 if (comm.is_null ()) {
1964 return Kokkos::ArithTraits<dot_type>::zero ();
1967 using LO = LocalOrdinal;
1971 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
1973 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
1974 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
1975 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
1982 const_cast<MV&
>(y).sync_device ();
1987 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
1989 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
1990 lclDot = KokkosBlas::dot (x_1d, y_1d);
1993 using Teuchos::outArg;
1994 using Teuchos::REDUCE_SUM;
1995 using Teuchos::reduceAll;
1996 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2008 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2012 const Teuchos::ArrayView<dot_type>& dots)
const
2015 const char tfecfFuncName[] =
"dot: ";
2018 const size_t numVecs = this->getNumVectors ();
2019 const size_t lclNumRows = this->getLocalLength ();
2020 const size_t numDots =
static_cast<size_t> (dots.size ());
2030 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2032 "MultiVectors do not have the same local length. "
2033 "this->getLocalLength() = " << lclNumRows <<
" != "
2035 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2037 "MultiVectors must have the same number of columns (vectors). "
2038 "this->getNumVectors() = " << numVecs <<
" != "
2040 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2041 (numDots != numVecs, std::runtime_error,
2042 "The output array 'dots' must have the same number of entries as the "
2043 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2044 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2047 const dot_type gblDot = multiVectorSingleColumnDot (const_cast<MV&> (*
this), A);
2051 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2055 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2058 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2061 using ::Tpetra::Details::NORM_TWO;
2062 using ::Tpetra::Details::ProfilingRegion;
2063 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2066 MV& X =
const_cast<MV&
> (*this);
2067 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2070 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2073 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2075 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2076 this->norm2 (norms_av);
2079 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2080 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2081 void TPETRA_DEPRECATED
2084 const Teuchos::ArrayView<mag_type> &norms)
const
2087 using Kokkos::subview;
2088 using Teuchos::Comm;
2089 using Teuchos::null;
2091 using Teuchos::reduceAll;
2092 using Teuchos::REDUCE_SUM;
2093 typedef Kokkos::ArithTraits<impl_scalar_type> ATS;
2094 typedef Kokkos::ArithTraits<mag_type> ATM;
2095 typedef Kokkos::View<mag_type*, Kokkos::HostSpace> norms_view_type;
2097 const char tfecfFuncName[] =
"normWeighted: ";
2099 const size_t numVecs = this->getNumVectors ();
2100 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2101 static_cast<size_t> (norms.size ()) != numVecs, std::runtime_error,
2102 "norms.size() = " << norms.size () <<
" != this->getNumVectors() = "
2106 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2107 ! OneW && weights.
getNumVectors () != numVecs, std::runtime_error,
2108 "The input MultiVector of weights must contain either one column, "
2109 "or must have the same number of columns as *this. "
2111 <<
" and this->getNumVectors() = " << numVecs <<
".");
2116 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2117 (! this->getMap ()->isCompatible (*weights.
getMap ()), std::runtime_error,
2118 "MultiVectors do not have compatible Maps:" << std::endl
2119 <<
"this->getMap(): " << std::endl << *this->getMap()
2120 <<
"weights.getMap(): " << std::endl << *weights.
getMap() << std::endl);
2123 const size_t lclNumRows = this->getLocalLength ();
2124 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2126 "MultiVectors do not have the same local length.");
2129 norms_view_type lclNrms (
"Tpetra::MV::lclNrms", numVecs);
2132 const_cast<MV*
> (
this)->sync_device ();
2133 const_cast<MV&
> (weights).sync_device ();
2135 auto X_lcl = this->getLocalViewDevice ();
2138 if (isConstantStride () && ! OneW) {
2139 KokkosBlas::nrm2w_squared (lclNrms, X_lcl, W_lcl);
2142 for (
size_t j = 0; j < numVecs; ++j) {
2143 const size_t X_col = this->isConstantStride () ? j :
2144 this->whichVectors_[j];
2145 const size_t W_col = OneW ?
static_cast<size_t> (0) :
2147 KokkosBlas::nrm2w_squared (subview (lclNrms, j),
2148 subview (X_lcl, ALL (), X_col),
2149 subview (W_lcl, ALL (), W_col));
2153 const mag_type OneOverN =
2154 ATM::one () /
static_cast<mag_type
> (this->getGlobalLength ());
2155 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
2156 Teuchos::null : this->getMap ()->getComm ();
2158 if (! comm.is_null () && this->isDistributed ()) {
2160 reduceAll<int, mag_type> (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2161 lclNrms.data (), norms.getRawPtr ());
2162 for (
size_t k = 0; k < numVecs; ++k) {
2163 norms[k] = ATM::sqrt (norms[k] * OneOverN);
2167 for (
size_t k = 0; k < numVecs; ++k) {
2168 norms[k] = ATM::sqrt (ATS::magnitude (lclNrms(k)) * OneOverN);
2172 #endif // TPETRA_ENABLE_DEPRECATED_CODE
2174 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2177 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2180 using ::Tpetra::Details::NORM_ONE;
2181 using ::Tpetra::Details::ProfilingRegion;
2182 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2185 MV& X =
const_cast<MV&
> (*this);
2186 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2189 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2192 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2194 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2195 this->norm1 (norms_av);
2198 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2201 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2204 using ::Tpetra::Details::NORM_INF;
2205 using ::Tpetra::Details::ProfilingRegion;
2206 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2209 MV& X =
const_cast<MV&
> (*this);
2210 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2213 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2216 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2218 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2219 this->normInf (norms_av);
2222 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2225 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2229 using Kokkos::subview;
2230 using Teuchos::Comm;
2232 using Teuchos::reduceAll;
2233 using Teuchos::REDUCE_SUM;
2234 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
2236 const size_t lclNumRows = this->getLocalLength ();
2237 const size_t numVecs = this->getNumVectors ();
2238 const size_t numMeans =
static_cast<size_t> (means.size ());
2240 TEUCHOS_TEST_FOR_EXCEPTION(
2241 numMeans != numVecs, std::runtime_error,
2242 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2243 <<
" != this->getNumVectors() = " << numVecs <<
".");
2245 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2246 const std::pair<size_t, size_t> colRng (0, numVecs);
2251 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2253 typename local_view_type::HostMirror::array_layout,
2255 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2256 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2258 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2259 this->getMap ()->getComm ();
2262 const bool useHostVersion = this->need_sync_device ();
2263 if (useHostVersion) {
2265 auto X_lcl = subview (getLocalViewHost (),
2266 rowRng, Kokkos::ALL ());
2268 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2269 if (isConstantStride ()) {
2270 KokkosBlas::sum (lclSums, X_lcl);
2273 for (
size_t j = 0; j < numVecs; ++j) {
2274 const size_t col = whichVectors_[j];
2275 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2282 if (! comm.is_null () && this->isDistributed ()) {
2283 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2284 lclSums.data (), meansOut.data ());
2292 auto X_lcl = subview (this->getLocalViewDevice (),
2293 rowRng, Kokkos::ALL ());
2296 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2297 if (isConstantStride ()) {
2298 KokkosBlas::sum (lclSums, X_lcl);
2301 for (
size_t j = 0; j < numVecs; ++j) {
2302 const size_t col = whichVectors_[j];
2303 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2311 if (! comm.is_null () && this->isDistributed ()) {
2312 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2313 lclSums.data (), meansOut.data ());
2323 const impl_scalar_type OneOverN =
2324 ATS::one () /
static_cast<mag_type> (this->getGlobalLength ());
2325 for (
size_t k = 0; k < numMeans; ++k) {
2326 meansOut(k) = meansOut(k) * OneOverN;
2331 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2337 typedef Kokkos::Details::ArithTraits<IST> ATS;
2338 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2339 typedef typename pool_type::generator_type generator_type;
2341 const IST max = Kokkos::rand<generator_type, IST>::max ();
2342 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2344 this->randomize (static_cast<Scalar> (min), static_cast<Scalar> (max));
2348 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2354 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2365 const uint64_t myRank =
2366 static_cast<uint64_t
> (this->getMap ()->getComm ()->getRank ());
2367 uint64_t seed64 =
static_cast<uint64_t
> (std::rand ()) + myRank + 17311uLL;
2368 unsigned int seed =
static_cast<unsigned int> (seed64&0xffffffff);
2370 pool_type rand_pool (seed);
2371 const IST max =
static_cast<IST
> (maxVal);
2372 const IST min =
static_cast<IST
> (minVal);
2377 this->view_.clear_sync_state();
2379 this->modify_device ();
2380 auto thisView = this->getLocalViewDevice ();
2382 if (isConstantStride ()) {
2383 Kokkos::fill_random (thisView, rand_pool, min, max);
2386 const size_t numVecs = getNumVectors ();
2387 for (
size_t k = 0; k < numVecs; ++k) {
2388 const size_t col = whichVectors_[k];
2389 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2390 Kokkos::fill_random (X_k, rand_pool, min, max);
2395 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2400 using ::Tpetra::Details::ProfilingRegion;
2401 using ::Tpetra::Details::Blas::fill;
2402 using DES =
typename dual_view_type::t_dev::execution_space;
2403 using HES =
typename dual_view_type::t_host::execution_space;
2404 using LO = LocalOrdinal;
2405 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2410 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
2411 const LO numVecs =
static_cast<LO
> (this->getNumVectors ());
2417 const bool runOnHost = this->need_sync_device ();
2420 this->modify_device ();
2421 auto X = this->getLocalViewDevice ();
2422 if (this->isConstantStride ()) {
2423 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2426 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2427 this->whichVectors_.getRawPtr ());
2431 this->modify_host ();
2432 auto X = this->getLocalViewHost ();
2433 if (this->isConstantStride ()) {
2434 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2437 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2438 this->whichVectors_.getRawPtr ());
2444 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2449 using Teuchos::ArrayRCP;
2450 using Teuchos::Comm;
2453 using LO = LocalOrdinal;
2454 using GO = GlobalOrdinal;
2460 TEUCHOS_TEST_FOR_EXCEPTION(
2461 ! this->isConstantStride (), std::logic_error,
2462 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2463 "if the MultiVector is a column view of another MultiVector (that is, if "
2464 "isConstantStride() == false).");
2494 #ifdef HAVE_TEUCHOS_DEBUG
2508 #endif // HAVE_TEUCHOS_DEBUG
2510 if (this->getMap ().is_null ()) {
2515 TEUCHOS_TEST_FOR_EXCEPTION(
2516 newMap.is_null (), std::invalid_argument,
2517 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2518 "This probably means that the input Map is incorrect.");
2522 const size_t newNumRows = newMap->getNodeNumElements ();
2523 const size_t origNumRows = view_.extent (0);
2524 const size_t numCols = this->getNumVectors ();
2526 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2527 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2530 else if (newMap.is_null ()) {
2533 const size_t newNumRows =
static_cast<size_t> (0);
2534 const size_t numCols = this->getNumVectors ();
2535 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2538 this->map_ = newMap;
2541 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2549 const IST theAlpha =
static_cast<IST
> (alpha);
2550 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2553 const size_t lclNumRows = getLocalLength ();
2554 const size_t numVecs = getNumVectors ();
2555 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2556 const std::pair<size_t, size_t> colRng (0, numVecs);
2564 const bool useHostVersion = need_sync_device ();
2565 if (useHostVersion) {
2566 auto Y_lcl = Kokkos::subview (getLocalViewHost (), rowRng, ALL ());
2567 if (isConstantStride ()) {
2568 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2571 for (
size_t k = 0; k < numVecs; ++k) {
2572 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2573 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2574 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2579 auto Y_lcl = Kokkos::subview (getLocalViewDevice (), rowRng, ALL ());
2580 if (isConstantStride ()) {
2581 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2584 for (
size_t k = 0; k < numVecs; ++k) {
2585 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2586 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2587 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2594 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2597 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2599 const size_t numVecs = this->getNumVectors ();
2600 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2601 TEUCHOS_TEST_FOR_EXCEPTION(
2602 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2603 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2607 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2608 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2609 k_alphas.modify_host ();
2610 for (
size_t i = 0; i < numAlphas; ++i) {
2613 k_alphas.sync_device ();
2615 this->scale (k_alphas.view_device ());
2618 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2621 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2624 using Kokkos::subview;
2626 const size_t lclNumRows = this->getLocalLength ();
2627 const size_t numVecs = this->getNumVectors ();
2628 TEUCHOS_TEST_FOR_EXCEPTION(
2629 static_cast<size_t> (alphas.extent (0)) != numVecs,
2630 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2631 "alphas.extent(0) = " << alphas.extent (0)
2632 <<
" != this->getNumVectors () = " << numVecs <<
".");
2633 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2634 const std::pair<size_t, size_t> colRng (0, numVecs);
2644 const bool useHostVersion = this->need_sync_device ();
2645 if (useHostVersion) {
2648 auto alphas_h = Kokkos::create_mirror_view (alphas);
2651 auto Y_lcl = subview (this->getLocalViewHost (), rowRng, ALL ());
2652 if (isConstantStride ()) {
2653 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2656 for (
size_t k = 0; k < numVecs; ++k) {
2657 const size_t Y_col = this->isConstantStride () ? k :
2658 this->whichVectors_[k];
2659 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2662 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2667 auto Y_lcl = subview (this->getLocalViewDevice (), rowRng, ALL ());
2668 if (isConstantStride ()) {
2669 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2676 auto alphas_h = Kokkos::create_mirror_view (alphas);
2679 for (
size_t k = 0; k < numVecs; ++k) {
2680 const size_t Y_col = this->isConstantStride () ? k :
2681 this->whichVectors_[k];
2682 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2683 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2689 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2696 using Kokkos::subview;
2698 const char tfecfFuncName[] =
"scale: ";
2700 const size_t lclNumRows = getLocalLength ();
2701 const size_t numVecs = getNumVectors ();
2703 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2705 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2707 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2709 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2713 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2714 const std::pair<size_t, size_t> colRng (0, numVecs);
2717 if (this->need_sync_device ()) {
2718 this->sync_device ();
2721 const_cast<MV&
>(A).sync_device ();
2724 this->modify_device ();
2725 auto Y_lcl_orig = this->getLocalViewDevice ();
2727 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
2728 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
2731 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2735 for (
size_t k = 0; k < numVecs; ++k) {
2736 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2738 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2739 auto X_k = subview (X_lcl, ALL (), X_col);
2741 KokkosBlas::scal (Y_k, theAlpha, X_k);
2748 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2754 const char tfecfFuncName[] =
"reciprocal: ";
2756 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2758 "MultiVectors do not have the same local length. "
2759 "this->getLocalLength() = " << getLocalLength ()
2761 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2762 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2763 ": MultiVectors do not have the same number of columns (vectors). "
2764 "this->getNumVectors() = " << getNumVectors ()
2765 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2767 const size_t numVecs = getNumVectors ();
2770 if (this->need_sync_device ()) {
2771 this->sync_device ();
2774 const_cast<MV&
>(A).sync_device ();
2776 this->modify_device ();
2778 auto this_view_dev = this->getLocalViewDevice ();
2782 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
2786 using Kokkos::subview;
2787 for (
size_t k = 0; k < numVecs; ++k) {
2788 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2789 auto vector_k = subview (this_view_dev, ALL (), this_col);
2790 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2791 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2792 KokkosBlas::reciprocal (vector_k, vector_Ak);
2797 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2803 const char tfecfFuncName[] =
"abs";
2805 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2807 ": MultiVectors do not have the same local length. "
2808 "this->getLocalLength() = " << getLocalLength ()
2810 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2811 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2812 ": MultiVectors do not have the same number of columns (vectors). "
2813 "this->getNumVectors() = " << getNumVectors ()
2814 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2815 const size_t numVecs = getNumVectors ();
2818 if (this->need_sync_device ()) {
2819 this->sync_device ();
2822 const_cast<MV&
>(A).sync_device ();
2824 this->modify_device ();
2826 auto this_view_dev = this->getLocalViewDevice ();
2830 KokkosBlas::abs (this_view_dev, A_view_dev);
2834 using Kokkos::subview;
2836 for (
size_t k=0; k < numVecs; ++k) {
2837 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2838 auto vector_k = subview (this_view_dev, ALL (), this_col);
2839 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2840 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2841 KokkosBlas::abs (vector_k, vector_Ak);
2846 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2853 const char tfecfFuncName[] =
"update: ";
2854 using Kokkos::subview;
2860 const size_t lclNumRows = getLocalLength ();
2861 const size_t numVecs = getNumVectors ();
2863 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2865 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2867 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2869 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2873 if (this->need_sync_device ()) {
2874 this->sync_device ();
2877 const_cast<MV&
>(A).sync_device ();
2882 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2883 const std::pair<size_t, size_t> colRng (0, numVecs);
2885 auto Y_lcl_orig = this->getLocalViewDevice ();
2886 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2888 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2891 this->modify_device ();
2893 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
2897 for (
size_t k = 0; k < numVecs; ++k) {
2898 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2900 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2901 auto X_k = subview (X_lcl, ALL (), X_col);
2903 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
2908 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2915 const Scalar& gamma)
2918 using Kokkos::subview;
2921 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
2925 const size_t lclNumRows = this->getLocalLength ();
2926 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2928 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
2929 "row(s), but this MultiVector has " << lclNumRows <<
" local "
2931 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2933 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
2934 "row(s), but this MultiVector has " << lclNumRows <<
" local "
2936 const size_t numVecs = getNumVectors ();
2937 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2939 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
2940 "but this MultiVector has " << numVecs <<
" column(s).");
2941 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2943 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
2944 "but this MultiVector has " << numVecs <<
" column(s).");
2951 if (this->need_sync_device ()) this->sync_device ();
2956 this->modify_device ();
2958 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2959 const std::pair<size_t, size_t> colRng (0, numVecs);
2964 auto C_lcl = subview (this->getLocalViewDevice (), rowRng, ALL ());
2969 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
2974 for (
size_t k = 0; k < numVecs; ++k) {
2975 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2978 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
2979 theBeta, subview (B_lcl, rowRng, B_col),
2980 theGamma, subview (C_lcl, rowRng, this_col));
2985 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2986 Teuchos::ArrayRCP<const Scalar>
2993 const char tfecfFuncName[] =
"getData: ";
2999 const_cast<MV*
> (
this)->sync_host ();
3001 auto hostView = getLocalViewHost ();
3002 const size_t col = isConstantStride () ? j : whichVectors_[j];
3003 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
3004 Teuchos::ArrayRCP<const IST> dataAsArcp =
3005 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3007 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3008 (static_cast<size_t> (hostView_j.extent (0)) <
3009 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3010 "hostView_j.extent(0) = " << hostView_j.extent (0)
3011 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3012 "Please report this bug to the Tpetra developers.");
3014 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3017 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3018 Teuchos::ArrayRCP<Scalar>
3023 using Kokkos::subview;
3026 const char tfecfFuncName[] =
"getDataNonConst: ";
3032 const_cast<MV*
> (
this)->sync_host ();
3037 auto hostView = getLocalViewHost ();
3038 const size_t col = isConstantStride () ? j : whichVectors_[j];
3039 auto hostView_j = subview (hostView, ALL (), col);
3040 Teuchos::ArrayRCP<IST> dataAsArcp =
3041 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3043 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3044 (static_cast<size_t> (hostView_j.extent (0)) <
3045 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3046 "hostView_j.extent(0) = " << hostView_j.extent (0)
3047 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3048 "Please report this bug to the Tpetra developers.");
3050 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3053 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3054 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3056 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3063 bool contiguous =
true;
3064 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3065 for (
size_t j = 1; j < numCopyVecs; ++j) {
3066 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3071 if (contiguous && numCopyVecs > 0) {
3072 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3075 RCP<const MV> X_sub = this->subView (cols);
3076 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3082 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3083 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3090 RCP<const MV> X_sub = this->subView (colRng);
3091 RCP<MV> Y (
new MV (this->getMap (), static_cast<size_t> (colRng.size ()),
false));
3096 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3100 return origView_.extent (0);
3103 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3107 return origView_.extent (1);
3110 template <
class Scalar,
class LO,
class GO,
class Node>
3113 const Teuchos::RCP<const map_type>& subMap,
3118 using Kokkos::subview;
3119 using Teuchos::outArg;
3122 using Teuchos::reduceAll;
3123 using Teuchos::REDUCE_MIN;
3126 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3127 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3130 std::unique_ptr<std::ostringstream> errStrm;
3137 const auto comm = subMap->getComm ();
3138 TEUCHOS_ASSERT( ! comm.is_null () );
3139 const int myRank = comm->getRank ();
3141 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3143 const LO newNumRows =
static_cast<LO
> (subMap->getNodeNumElements ());
3145 std::ostringstream os;
3146 os <<
"Proc " << myRank <<
": " << prefix
3147 <<
"X: {lclNumRows: " << lclNumRowsBefore
3149 <<
", numCols: " << numCols <<
"}, "
3150 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3151 std::cerr << os.str ();
3156 const bool tooManyElts =
3159 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3160 *errStrm <<
" Proc " << myRank <<
": subMap->getNodeNumElements() (="
3161 << newNumRows <<
") + rowOffset (=" << rowOffset
3165 TEUCHOS_TEST_FOR_EXCEPTION
3166 (! debug && tooManyElts, std::invalid_argument,
3167 prefix << errStrm->str () << suffix);
3171 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3173 std::ostringstream gblErrStrm;
3174 const std::string myErrStr =
3175 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3176 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3177 TEUCHOS_TEST_FOR_EXCEPTION
3178 (
true, std::invalid_argument, gblErrStrm.str ());
3182 using range_type = std::pair<LO, LO>;
3183 const range_type origRowRng
3184 (rowOffset, static_cast<LO> (X.
origView_.extent (0)));
3185 const range_type rowRng
3186 (rowOffset, rowOffset + newNumRows);
3188 dual_view_type newOrigView = subview (X.
origView_, origRowRng, ALL ());
3199 dual_view_type newView =
3200 subview (rowOffset == 0 ? X.
origView_ : X.
view_, rowRng, ALL ());
3207 if (newOrigView.extent (0) == 0 &&
3208 newOrigView.extent (1) != X.
origView_.extent (1)) {
3210 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3212 if (newView.extent (0) == 0 &&
3213 newView.extent (1) != X.
view_.extent (1)) {
3215 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3219 MV (subMap, newView, newOrigView) :
3223 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3224 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3225 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3227 if (errStrm.get () ==
nullptr) {
3228 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3230 *errStrm <<
" Proc " << myRank <<
3231 ": subMap.getNodeNumElements(): " << newNumRows <<
3232 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3233 ", X.getNumVectors(): " << numCols <<
3234 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3236 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3238 std::ostringstream gblErrStrm;
3240 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3241 "dimensions on one or more processes:" << endl;
3243 const std::string myErrStr =
3244 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3245 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3246 gblErrStrm << suffix << endl;
3247 TEUCHOS_TEST_FOR_EXCEPTION
3248 (
true, std::invalid_argument, gblErrStrm.str ());
3253 std::ostringstream os;
3254 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3255 std::cerr << os.str ();
3261 std::ostringstream os;
3262 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3263 std::cerr << os.str ();
3267 template <
class Scalar,
class LO,
class GO,
class Node>
3271 const size_t rowOffset) :
3276 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3277 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3280 const size_t offset)
const
3283 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3286 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3287 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3290 const size_t offset)
3293 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3296 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3297 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3299 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3301 using Teuchos::Array;
3305 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3306 TEUCHOS_TEST_FOR_EXCEPTION(
3307 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3308 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3309 "contain at least one entry, but cols.size() = " << cols.size ()
3314 bool contiguous =
true;
3315 for (
size_t j = 1; j < numViewCols; ++j) {
3316 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3322 if (numViewCols == 0) {
3324 return rcp (
new MV (this->getMap (), numViewCols));
3327 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3331 if (isConstantStride ()) {
3332 return rcp (
new MV (this->getMap (), view_, origView_, cols));
3335 Array<size_t> newcols (cols.size ());
3336 for (
size_t j = 0; j < numViewCols; ++j) {
3337 newcols[j] = whichVectors_[cols[j]];
3339 return rcp (
new MV (this->getMap (), view_, origView_, newcols ()));
3344 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3345 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3349 using ::Tpetra::Details::Behavior;
3351 using Kokkos::subview;
3352 using Teuchos::Array;
3356 const char tfecfFuncName[] =
"subView(Range1D): ";
3358 const size_t lclNumRows = this->getLocalLength ();
3359 const size_t numVecs = this->getNumVectors ();
3363 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3364 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3365 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3367 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3368 numVecs != 0 && colRng.size () != 0 &&
3369 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3370 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3371 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3372 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3373 "[0, " << numVecs <<
"].");
3375 RCP<const MV> X_ret;
3385 const std::pair<size_t, size_t> rows (0, lclNumRows);
3386 if (colRng.size () == 0) {
3387 const std::pair<size_t, size_t> cols (0, 0);
3388 dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3389 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3393 if (isConstantStride ()) {
3394 const std::pair<size_t, size_t> cols (colRng.lbound (),
3395 colRng.ubound () + 1);
3396 dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3397 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3400 if (static_cast<size_t> (colRng.size ()) == static_cast<size_t> (1)) {
3403 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3404 whichVectors_[0] + colRng.ubound () + 1);
3405 dual_view_type X_sub = takeSubview (view_, ALL (), col);
3406 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3409 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3410 whichVectors_.begin () + colRng.ubound () + 1);
3411 X_ret = rcp (
new MV (this->getMap (), view_, origView_, which));
3416 const bool debug = Behavior::debug ();
3418 using Teuchos::Comm;
3419 using Teuchos::outArg;
3420 using Teuchos::REDUCE_MIN;
3421 using Teuchos::reduceAll;
3423 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3424 Teuchos::null : this->getMap ()->getComm ();
3425 if (! comm.is_null ()) {
3429 if (X_ret.is_null ()) {
3432 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3433 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3434 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3435 "MultiVector; the return value of this method) is null on some MPI "
3436 "process in this MultiVector's communicator. This should never "
3437 "happen. Please report this bug to the Tpetra developers.");
3438 if (! X_ret.is_null () &&
3439 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3442 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3443 outArg (gblSuccess));
3444 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3445 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3446 "colRng.size(), on at least one MPI process in this MultiVector's "
3447 "communicator. This should never happen. "
3448 "Please report this bug to the Tpetra developers.");
3455 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3456 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3461 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3465 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3466 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3471 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3475 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3481 using Kokkos::subview;
3482 typedef std::pair<size_t, size_t> range_type;
3483 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3486 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3487 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3488 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3490 static_cast<size_t> (j) :
3492 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3504 const size_t newSize = X.
imports_.extent (0) / numCols;
3506 newImports.d_view = subview (X.
imports_.d_view, range_type (0, newSize));
3507 newImports.h_view = subview (X.
imports_.h_view, range_type (0, newSize));
3510 const size_t newSize = X.
exports_.extent (0) / numCols;
3512 newExports.d_view = subview (X.
exports_.d_view, range_type (0, newSize));
3513 newExports.h_view = subview (X.
exports_.h_view, range_type (0, newSize));
3523 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3524 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3529 return Teuchos::rcp (
new V (*
this, j));
3533 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3534 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3539 return Teuchos::rcp (
new V (*
this, j));
3543 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3546 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3548 using dev_view_type =
typename dual_view_type::t_dev;
3549 using host_view_type =
typename dual_view_type::t_host;
3551 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3553 Kokkos::MemoryUnmanaged>;
3554 const char tfecfFuncName[] =
"get1dCopy: ";
3556 const size_t numRows = this->getLocalLength ();
3557 const size_t numCols = this->getNumVectors ();
3559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3560 (LDA < numRows, std::runtime_error,
3561 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3562 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3563 (numRows >
size_t (0) && numCols >
size_t (0) &&
3564 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3566 "A.size() = " << A.size () <<
", but its size must be at least "
3567 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3569 const std::pair<size_t, size_t> rowRange (0, numRows);
3570 const std::pair<size_t, size_t> colRange (0, numCols);
3572 input_view_type A_view_orig (reinterpret_cast<IST*> (A.getRawPtr ()),
3574 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3581 const bool useHostVersion = this->need_sync_device ();
3583 dev_view_type srcView_dev;
3584 host_view_type srcView_host;
3585 if (useHostVersion) {
3586 srcView_host = this->getLocalViewHost ();
3589 srcView_dev = this->getLocalViewDevice ();
3592 if (this->isConstantStride ()) {
3593 if (useHostVersion) {
3601 for (
size_t j = 0; j < numCols; ++j) {
3602 const size_t srcCol = this->whichVectors_[j];
3603 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3605 if (useHostVersion) {
3606 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3610 auto srcColView_dev = Kokkos::subview (srcView_dev, rowRange, srcCol);
3618 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3621 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3624 const char tfecfFuncName[] =
"get2dCopy: ";
3625 const size_t numRows = this->getLocalLength ();
3626 const size_t numCols = this->getNumVectors ();
3628 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3629 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3630 std::runtime_error,
"Input array of pointers must contain as many "
3631 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3632 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3634 if (numRows != 0 && numCols != 0) {
3636 for (
size_t j = 0; j < numCols; ++j) {
3637 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3638 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3639 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3640 "the input array of arrays is not long enough to fit all entries in "
3641 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3642 <<
" < getLocalLength() = " << numRows <<
".");
3646 for (
size_t j = 0; j < numCols; ++j) {
3647 Teuchos::RCP<const V> X_j = this->getVector (j);
3648 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3649 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3655 template <
class SC,
class LO,
class GO,
class NT>
3658 const bool markModified)
3675 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3676 Teuchos::ArrayRCP<const Scalar>
3680 if (getLocalLength () == 0 || getNumVectors () == 0) {
3681 return Teuchos::null;
3683 TEUCHOS_TEST_FOR_EXCEPTION(
3684 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3685 "get1dView: This MultiVector does not have constant stride, so it is "
3686 "not possible to view its data as a single array. You may check "
3687 "whether a MultiVector has constant stride by calling "
3688 "isConstantStride().");
3692 constexpr
bool markModified =
false;
3694 auto X_lcl = syncMVToHostIfNeededAndGetHostView (const_cast<MV&> (*
this),
3696 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3697 Kokkos::Compat::persistingView (X_lcl);
3698 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3702 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3703 Teuchos::ArrayRCP<Scalar>
3707 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3708 return Teuchos::null;
3711 TEUCHOS_TEST_FOR_EXCEPTION
3712 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3713 "get1dViewNonConst: This MultiVector does not have constant stride, "
3714 "so it is not possible to view its data as a single array. You may "
3715 "check whether a MultiVector has constant stride by calling "
3716 "isConstantStride().");
3717 constexpr
bool markModified =
true;
3718 auto X_lcl = syncMVToHostIfNeededAndGetHostView (*
this, markModified);
3719 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3720 Kokkos::Compat::persistingView (X_lcl);
3721 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3725 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3726 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3730 constexpr
bool markModified =
true;
3731 auto X_lcl = syncMVToHostIfNeededAndGetHostView (*
this, markModified);
3737 const size_t myNumRows = this->getLocalLength ();
3738 const size_t numCols = this->getNumVectors ();
3739 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3741 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3742 for (
size_t j = 0; j < numCols; ++j) {
3743 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3744 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3745 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3746 Kokkos::Compat::persistingView (X_lcl_j);
3747 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3752 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3753 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3760 constexpr
bool markModified =
false;
3762 auto X_lcl = syncMVToHostIfNeededAndGetHostView (const_cast<MV&> (*
this),
3768 const size_t myNumRows = this->getLocalLength ();
3769 const size_t numCols = this->getNumVectors ();
3770 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3772 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
3773 for (
size_t j = 0; j < numCols; ++j) {
3774 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3775 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3776 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
3777 Kokkos::Compat::persistingView (X_lcl_j);
3778 views[j] = Teuchos::arcp_reinterpret_cast<
const Scalar> (X_lcl_j_arcp);
3783 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3787 Teuchos::ETransp transB,
3788 const Scalar& alpha,
3793 using ::Tpetra::Details::ProfilingRegion;
3794 using Teuchos::CONJ_TRANS;
3795 using Teuchos::NO_TRANS;
3796 using Teuchos::TRANS;
3799 using Teuchos::rcpFromRef;
3801 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
3803 using STS = Teuchos::ScalarTraits<Scalar>;
3805 const char tfecfFuncName[] =
"multiply: ";
3806 ProfilingRegion region (
"Tpetra::MV::multiply");
3838 const bool C_is_local = ! this->isDistributed ();
3848 auto myMap = this->getMap ();
3849 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
3850 using Teuchos::REDUCE_MIN;
3851 using Teuchos::reduceAll;
3852 using Teuchos::outArg;
3854 auto comm = myMap->getComm ();
3855 const size_t A_nrows =
3857 const size_t A_ncols =
3859 const size_t B_nrows =
3861 const size_t B_ncols =
3864 const bool lclBad = this->getLocalLength () != A_nrows ||
3865 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
3867 const int myRank = comm->getRank ();
3868 std::ostringstream errStrm;
3869 if (this->getLocalLength () != A_nrows) {
3870 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
3871 << this->getLocalLength () <<
" != A_nrows=" << A_nrows
3872 <<
"." << std::endl;
3874 if (this->getNumVectors () != B_ncols) {
3875 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
3876 << this->getNumVectors () <<
" != B_ncols=" << B_ncols
3877 <<
"." << std::endl;
3879 if (A_ncols != B_nrows) {
3880 errStrm <<
"Proc " << myRank <<
": A_ncols="
3881 << A_ncols <<
" != B_nrows=" << B_nrows
3882 <<
"." << std::endl;
3885 const int lclGood = lclBad ? 0 : 1;
3887 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3889 std::ostringstream os;
3890 ::Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
3892 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3893 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
3894 "least one process in this object's communicator." << std::endl
3896 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
3898 << (transA == Teuchos::TRANS ?
"^T" :
3899 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3900 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
3902 << (transA == Teuchos::TRANS ?
"^T" :
3903 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3904 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
3905 <<
"Global dimensions: C(" << this->getGlobalLength () <<
", "
3915 const bool Case1 = C_is_local && A_is_local && B_is_local;
3917 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
3918 transA != NO_TRANS &&
3921 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
3925 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3926 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
3927 "Multiplication of op(A) and op(B) into *this is not a "
3928 "supported use case.");
3930 if (beta != STS::zero () && Case2) {
3936 const int myRank = this->getMap ()->getComm ()->getRank ();
3938 beta_local = ATS::zero ();
3947 if (! isConstantStride ()) {
3948 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
3950 C_tmp = rcp (
this,
false);
3953 RCP<const MV> A_tmp;
3955 A_tmp = rcp (
new MV (A, Teuchos::Copy));
3957 A_tmp = rcpFromRef (A);
3960 RCP<const MV> B_tmp;
3962 B_tmp = rcp (
new MV (B, Teuchos::Copy));
3964 B_tmp = rcpFromRef (B);
3967 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3968 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
3969 ! A_tmp->isConstantStride (), std::logic_error,
3970 "Failed to make temporary constant-stride copies of MultiVectors.");
3973 if (A_tmp->need_sync_device ()) {
3974 const_cast<MV&
> (*A_tmp).sync_device ();
3976 const LO A_lclNumRows = A_tmp->getLocalLength ();
3977 const LO A_numVecs = A_tmp->getNumVectors ();
3978 auto A_lcl = A_tmp->getLocalViewDevice ();
3979 auto A_sub = Kokkos::subview (A_lcl,
3980 std::make_pair (LO (0), A_lclNumRows),
3981 std::make_pair (LO (0), A_numVecs));
3983 if (B_tmp->need_sync_device ()) {
3984 const_cast<MV&
> (*B_tmp).sync_device ();
3986 const LO B_lclNumRows = B_tmp->getLocalLength ();
3987 const LO B_numVecs = B_tmp->getNumVectors ();
3988 auto B_lcl = B_tmp->getLocalViewDevice ();
3989 auto B_sub = Kokkos::subview (B_lcl,
3990 std::make_pair (LO (0), B_lclNumRows),
3991 std::make_pair (LO (0), B_numVecs));
3993 if (C_tmp->need_sync_device ()) {
3994 const_cast<MV&
> (*C_tmp).sync_device ();
3996 const LO C_lclNumRows = C_tmp->getLocalLength ();
3997 const LO C_numVecs = C_tmp->getNumVectors ();
3998 auto C_lcl = C_tmp->getLocalViewDevice ();
3999 auto C_sub = Kokkos::subview (C_lcl,
4000 std::make_pair (LO (0), C_lclNumRows),
4001 std::make_pair (LO (0), C_numVecs));
4002 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4003 (transA == Teuchos::TRANS ?
'T' :
'C'));
4004 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4005 (transB == Teuchos::TRANS ?
'T' :
'C'));
4008 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
4009 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4013 if (! isConstantStride ()) {
4018 A_tmp = Teuchos::null;
4019 B_tmp = Teuchos::null;
4027 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4036 using Kokkos::subview;
4039 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4041 const size_t lclNumRows = this->getLocalLength ();
4042 const size_t numVecs = this->getNumVectors ();
4044 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4046 std::runtime_error,
"MultiVectors do not have the same local length.");
4047 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4048 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4049 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4053 if (this->need_sync_device ()) {
4054 this->sync_device ();
4057 const_cast<V&
>(A).sync_device ();
4060 const_cast<MV&
>(B).sync_device ();
4062 this->modify_device ();
4064 auto this_view = this->getLocalViewDevice ();
4074 KokkosBlas::mult (scalarThis,
4077 subview (A_view, ALL (), 0),
4081 for (
size_t j = 0; j < numVecs; ++j) {
4082 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4084 KokkosBlas::mult (scalarThis,
4085 subview (this_view, ALL (), C_col),
4087 subview (A_view, ALL (), 0),
4088 subview (B_view, ALL (), B_col));
4093 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4099 using ::Tpetra::Details::ProfilingRegion;
4100 ProfilingRegion region (
"Tpetra::MV::reduce");
4102 const auto map = this->getMap ();
4103 if (map.get () ==
nullptr) {
4106 const auto comm = map->getComm ();
4107 if (comm.get () ==
nullptr) {
4113 const bool changed_on_device = this->need_sync_host ();
4114 if (changed_on_device) {
4118 this->modify_device ();
4119 auto X_lcl = this->getLocalViewDevice ();
4123 this->modify_host ();
4124 auto X_lcl = this->getLocalViewHost ();
4129 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4136 #ifdef HAVE_TPETRA_DEBUG
4137 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4138 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4139 TEUCHOS_TEST_FOR_EXCEPTION(
4140 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4142 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4143 <<
" is invalid. The range of valid row indices on this process "
4144 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4145 <<
", " << maxLocalIndex <<
"].");
4146 TEUCHOS_TEST_FOR_EXCEPTION(
4147 vectorIndexOutOfRange(col),
4149 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4150 <<
" of the multivector is invalid.");
4152 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4153 view_.h_view (lclRow, colInd) = ScalarValue;
4157 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4163 const bool atomic)
const
4165 #ifdef HAVE_TPETRA_DEBUG
4166 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4167 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4168 TEUCHOS_TEST_FOR_EXCEPTION(
4169 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4171 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4172 <<
" is invalid. The range of valid row indices on this process "
4173 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4174 <<
", " << maxLocalIndex <<
"].");
4175 TEUCHOS_TEST_FOR_EXCEPTION(
4176 vectorIndexOutOfRange(col),
4178 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4179 <<
" of the multivector is invalid.");
4181 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4183 Kokkos::atomic_add (& (view_.h_view(lclRow, colInd)), value);
4186 view_.h_view (lclRow, colInd) += value;
4191 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4200 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4201 #ifdef HAVE_TPETRA_DEBUG
4202 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4203 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4204 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4206 "Global row index " << gblRow <<
" is not present on this process "
4207 << this->getMap ()->getComm ()->getRank () <<
".");
4208 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4209 (this->vectorIndexOutOfRange (col), std::runtime_error,
4210 "Vector index " << col <<
" of the MultiVector is invalid.");
4211 #endif // HAVE_TPETRA_DEBUG
4212 this->replaceLocalValue (lclRow, col, ScalarValue);
4215 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4221 const bool atomic)
const
4225 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4226 #ifdef HAVE_TEUCHOS_DEBUG
4227 TEUCHOS_TEST_FOR_EXCEPTION(
4228 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4230 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4231 <<
" is not present on this process "
4232 << this->getMap ()->getComm ()->getRank () <<
".");
4233 TEUCHOS_TEST_FOR_EXCEPTION(
4234 vectorIndexOutOfRange(col),
4236 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4237 <<
" of the multivector is invalid.");
4239 this->sumIntoLocalValue (lclRow, col, value, atomic);
4242 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4244 Teuchos::ArrayRCP<T>
4250 typename dual_view_type::array_layout,
4252 const size_t col = isConstantStride () ? j : whichVectors_[j];
4253 col_dual_view_type X_col =
4254 Kokkos::subview (view_, Kokkos::ALL (), col);
4255 return Kokkos::Compat::persistingView (X_col.d_view);
4258 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
4259 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4266 #endif // TPETRA_ENABLE_DEPRECATED_CODE
4268 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4272 view_.clear_sync_state ();
4275 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4282 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4286 view_.sync_device ();
4289 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4293 return view_.need_sync_host ();
4296 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4300 return view_.need_sync_device ();
4303 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4307 view_.modify_device ();
4310 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4314 view_.modify_host ();
4317 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4321 return view_.view_device ();
4324 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4328 return view_.view_host ();
4331 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4336 using Teuchos::TypeNameTraits;
4338 std::ostringstream out;
4339 out <<
"\"" << className <<
"\": {";
4340 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4341 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4342 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4343 <<
", Node" << Node::name ()
4345 if (this->getObjectLabel () !=
"") {
4346 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4348 out <<
", numRows: " << this->getGlobalLength ();
4349 if (className !=
"Tpetra::Vector") {
4350 out <<
", numCols: " << this->getNumVectors ()
4351 <<
", isConstantStride: " << this->isConstantStride ();
4353 if (this->isConstantStride ()) {
4354 out <<
", columnStride: " << this->getStride ();
4361 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4366 return this->descriptionImpl (
"Tpetra::MultiVector");
4369 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4374 typedef LocalOrdinal LO;
4377 if (vl <= Teuchos::VERB_LOW) {
4378 return std::string ();
4380 auto map = this->getMap ();
4381 if (map.is_null ()) {
4382 return std::string ();
4384 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4385 auto outp = Teuchos::getFancyOStream (outStringP);
4386 Teuchos::FancyOStream& out = *outp;
4387 auto comm = map->getComm ();
4388 const int myRank = comm->getRank ();
4389 const int numProcs = comm->getSize ();
4391 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4392 Teuchos::OSTab tab1 (out);
4395 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4396 out <<
"Local number of rows: " << lclNumRows << endl;
4398 if (vl > Teuchos::VERB_MEDIUM) {
4401 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4402 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4404 if (this->isConstantStride ()) {
4405 out <<
"Column stride: " << this->getStride () << endl;
4408 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4412 typename dual_view_type::t_host X_host;
4413 if (need_sync_host ()) {
4419 auto X_dev = getLocalViewDevice ();
4420 auto X_host_copy = Kokkos::create_mirror_view (X_dev);
4422 X_host = X_host_copy;
4428 X_host = getLocalViewHost ();
4432 out <<
"Values: " << endl
4434 const LO numCols =
static_cast<LO
> (this->getNumVectors ());
4436 for (LO i = 0; i < lclNumRows; ++i) {
4438 if (i + 1 < lclNumRows) {
4444 for (LO i = 0; i < lclNumRows; ++i) {
4445 for (LO j = 0; j < numCols; ++j) {
4447 if (j + 1 < numCols) {
4451 if (i + 1 < lclNumRows) {
4461 return outStringP->str ();
4464 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4468 const std::string& className,
4469 const Teuchos::EVerbosityLevel verbLevel)
const
4471 using Teuchos::TypeNameTraits;
4472 using Teuchos::VERB_DEFAULT;
4473 using Teuchos::VERB_NONE;
4474 using Teuchos::VERB_LOW;
4476 const Teuchos::EVerbosityLevel vl =
4477 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4479 if (vl == VERB_NONE) {
4486 auto map = this->getMap ();
4487 if (map.is_null ()) {
4490 auto comm = map->getComm ();
4491 if (comm.is_null ()) {
4495 const int myRank = comm->getRank ();
4504 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4508 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4509 out <<
"\"" << className <<
"\":" << endl;
4510 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4512 out <<
"Template parameters:" << endl;
4513 Teuchos::OSTab tab2 (out);
4514 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4515 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4516 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4517 <<
"Node: " << Node::name () << endl;
4519 if (this->getObjectLabel () !=
"") {
4520 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4522 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4523 if (className !=
"Tpetra::Vector") {
4524 out <<
"Number of columns: " << this->getNumVectors () << endl;
4531 if (vl > VERB_LOW) {
4532 const std::string lclStr = this->localDescribeToString (vl);
4533 ::Tpetra::Details::gathervPrint (out, lclStr, *comm);
4537 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4541 const Teuchos::EVerbosityLevel verbLevel)
const
4543 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4546 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4551 replaceMap (newMap);
4554 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4559 using ::Tpetra::Details::localDeepCopy;
4560 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4562 TEUCHOS_TEST_FOR_EXCEPTION
4564 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4565 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4566 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4567 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4568 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4570 TEUCHOS_TEST_FOR_EXCEPTION
4571 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4572 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4573 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4574 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4580 this->clear_sync_state();
4581 this->modify_device ();
4586 if (src_last_updated_on_host) {
4587 localDeepCopy (this->getLocalViewDevice (),
4589 this->isConstantStride (),
4591 this->whichVectors_ (),
4595 localDeepCopy (this->getLocalViewDevice (),
4597 this->isConstantStride (),
4599 this->whichVectors_ (),
4604 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4608 using ::Tpetra::Details::PackTraits;
4611 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
4613 const size_t l1 = this->getLocalLength();
4615 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4622 auto v1 = this->getLocalViewHost ();
4624 if (PackTraits<ST, HES>::packValueCount (v1(0,0)) !=
4625 PackTraits<ST, HES>::packValueCount (v2(0,0))) {
4632 template <
class ST,
class LO,
class GO,
class NT>
4635 std::swap(mv.
map_, this->map_);
4636 std::swap(mv.
view_, this->view_);
4637 std::swap(mv.
origView_, this->origView_);
4641 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4642 template <
class ST,
class LO,
class GO,
class NT>
4645 const Teuchos::SerialDenseMatrix<int, ST>& src)
4647 using ::Tpetra::Details::localDeepCopy;
4649 using IST =
typename MV::impl_scalar_type;
4650 using input_view_type =
4651 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4652 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4653 using pair_type = std::pair<LO, LO>;
4655 const LO numRows =
static_cast<LO
> (src.numRows ());
4656 const LO numCols =
static_cast<LO
> (src.numCols ());
4658 TEUCHOS_TEST_FOR_EXCEPTION
4661 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4662 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4664 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4666 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4667 input_view_type src_orig (src_raw, src.stride (), numCols);
4668 input_view_type src_view =
4669 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4673 constexpr
bool src_isConstantStride =
true;
4674 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4678 src_isConstantStride,
4679 getMultiVectorWhichVectors (dst),
4683 template <
class ST,
class LO,
class GO,
class NT>
4685 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4686 const MultiVector<ST, LO, GO, NT>& src)
4688 using ::Tpetra::Details::localDeepCopy;
4689 using MV = MultiVector<ST, LO, GO, NT>;
4690 using IST =
typename MV::impl_scalar_type;
4691 using output_view_type =
4692 Kokkos::View<IST**, Kokkos::LayoutLeft,
4693 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4694 using pair_type = std::pair<LO, LO>;
4696 const LO numRows =
static_cast<LO
> (dst.numRows ());
4697 const LO numCols =
static_cast<LO
> (dst.numCols ());
4699 TEUCHOS_TEST_FOR_EXCEPTION
4700 (numRows != static_cast<LO> (src.getLocalLength ()) ||
4701 numCols != static_cast<LO> (src.getNumVectors ()),
4702 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4703 << src.getMap ()->getComm ()->getRank () <<
", src is "
4704 << src.getLocalLength () <<
" x " << src.getNumVectors ()
4705 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4707 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4708 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4710 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4712 constexpr
bool dst_isConstantStride =
true;
4713 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4716 if (src.need_sync_host ()) {
4717 localDeepCopy (dst_view,
4718 src.getLocalViewDevice (),
4719 dst_isConstantStride,
4720 src.isConstantStride (),
4722 getMultiVectorWhichVectors (src));
4726 src.getLocalViewHost (),
4727 dst_isConstantStride,
4728 src.isConstantStride (),
4730 getMultiVectorWhichVectors (src));
4733 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4735 template <
class Scalar,
class LO,
class GO,
class NT>
4736 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4740 typedef MultiVector<Scalar, LO, GO, NT> MV;
4741 return Teuchos::rcp (
new MV (map, numVectors));
4744 template <
class ST,
class LO,
class GO,
class NT>
4745 MultiVector<ST, LO, GO, NT>
4762 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4763 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4764 template class MultiVector< SCALAR , LO , GO , NODE >; \
4765 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4766 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4767 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4768 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4771 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4772 template class MultiVector< SCALAR , LO , GO , NODE >; \
4773 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4775 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4777 #endif // TPETRA_MULTIVECTOR_DEF_HPP
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
dual_view_type::t_host getLocalViewHost() const
A local Kokkos::View of host memory.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with the given verbosity level to a FancyOStream.
dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
Teuchos::ArrayRCP< Scalar > get1dViewNonConst()
Nonconst persisting (1-D) view of this multivector's local values.
void meanValue(const Teuchos::ArrayView< impl_scalar_type > &means) const
Compute mean (average) value of each column.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
void get2dCopy(const Teuchos::ArrayView< const Teuchos::ArrayView< Scalar > > &ArrayOfPtrs) const
Fill the given array with a copy of this multivector's local values.
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
void clear_sync_state()
Clear "modified" flags on both host and device sides.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, execution_space > dual_view_type
Kokkos::DualView specialization used by this class.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
void replaceMap(const Teuchos::RCP< const map_type > &map)
Replace the underlying Map in place.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< Scalar > > get2dViewNonConst()
Return non-const persisting pointers to values.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
std::string localDescribeToString(const Teuchos::EVerbosityLevel vl) const
Print the calling process' verbose describe() information to the returned string. ...
One or more distributed dense vectors.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subCopy(const Teuchos::Range1D &colRng) const
Return a MultiVector with copies of selected columns.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
void sumIntoGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value, const bool atomic=useAtomicUpdatesByDefault) const
Update (+=) a value in host memory, using global row index.
bool isDistributed() const
Whether this is a globally distributed object.
void get1dCopy(const Teuchos::ArrayView< Scalar > &A, const size_t LDA) const
Fill the given array with a copy of this multivector's local values.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
static bool debug()
Whether Tpetra is in debug mode.
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
bool isSameSize(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &vec) const
size_t getOrigNumLocalCols() const
"Original" number of columns in the (local) data.
Teuchos::ArrayRCP< const Scalar > getData(size_t j) const
Const view of the local values in a particular vector of this multivector.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Insert new values that don't currently exist.
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
void sync_host()
Synchronize to Host.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
void multiply(Teuchos::ETransp transA, Teuchos::ETransp transB, const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Scalar &beta)
Matrix-matrix multiplication: this = beta*this + alpha*op(A)*op(B).
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
void elementWiseMultiply(Scalar scalarAB, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarThis)
Multiply a Vector A elementwise by a MultiVector B.
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
dual_view_type origView_
The "original view" of the MultiVector's data.
std::string descriptionImpl(const std::string &className) const
Implementation of description() for this class, and its subclass Vector.
void normInf(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the infinity-norm of each vector (column), storing the result in a host View.
void replaceGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value) const
Replace value in host memory, using global row index.
typename Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Declaration of Tpetra::Details::isInterComm.
void norm2(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the two-norm of each vector (column), storing the result in a host View.
void dot(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayView< dot_type > &dots) const
Compute the dot product of each corresponding pair of vectors (columns) in A and B.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subViewNonConst(const Teuchos::Range1D &colRng)
Return a MultiVector with views of selected columns.
virtual size_t constantNumberOfPackets() const
Number of packets to send per LID.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
void modify_device()
Mark data as modified on the device side.
Replace old value with maximum of magnitudes of old and new values.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
Abstract base class for objects that can be the source of an Import or Export operation.
Teuchos::ArrayRCP< Scalar > getDataNonConst(size_t j)
View of the local values in a particular vector of this multivector.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Replace existing values with new values.
void swap(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &mv)
Return a deep copy of this MultiVector, with a different Node type.
void randomize()
Set all values in the multivector to pseudorandom numbers.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetViewNonConst(const Teuchos::RCP< const map_type > &subMap, const size_t offset)
Return a nonconst view of a subset of rows.
Teuchos::ArrayRCP< T > getSubArrayRCP(Teuchos::ArrayRCP< T > arr, size_t j) const
Persisting view of j-th column in the given ArrayRCP.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
void modify_host()
Mark data as modified on the host side.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
void replaceLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &value) const
Replace value in host memory, using local (row) index.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
void norm1(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the one-norm of each vector (column), storing the result in a host view.
A parallel distribution of indices over processes.
size_t getStride() const
Stride between columns in the multivector.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual std::string description() const
A simple one-line description of this object.
void reduce()
Sum values of a locally replicated multivector across all processes.
void describeImpl(Teuchos::FancyOStream &out, const std::string &className, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Implementation of describe() for this class, and its subclass Vector.
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual bool checkSizes(const SrcDistObject &sourceObj)
Whether data redistribution between sourceObj and this object is legal.
dual_view_type::t_dev getLocalViewDevice() const
A local Kokkos::View of device memory.
void sumIntoLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &val, const bool atomic=useAtomicUpdatesByDefault) const
Update (+=) a value in host memory, using local row index.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< const Scalar > > get2dView() const
Return const persisting pointers to values.
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
void reciprocal(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,j).
Base class for distributed Tpetra objects that support data redistribution.
void localDeepCopy(const DstViewType &dst, const SrcViewType &src, const bool dstConstStride, const bool srcConstStride, const DstWhichVecsType &dstWhichVecs, const SrcWhichVecsType &srcWhichVecs)
Implementation of Tpetra::MultiVector deep copy of local data.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
void abs(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise absolute values of input Multi-vector in target: A = abs(this)
EWhichNorm
Input argument for normImpl() (which see).
Teuchos::ArrayRCP< const Scalar > get1dView() const
Const persisting (1-D) view of this multivector's local values.
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Map and their communicator.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
void sync_device()
Synchronize to Device.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.