41 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
42 #define TPETRA_MULTIVECTOR_DEF_HPP
54 #include "Tpetra_Vector.hpp"
66 #include "Tpetra_Details_Random.hpp"
67 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
68 # include "Teuchos_SerialDenseMatrix.hpp"
69 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
70 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
71 #include "KokkosCompat_View.hpp"
72 #include "KokkosBlas.hpp"
73 #include "KokkosKernels_Utils.hpp"
74 #include "Kokkos_Random.hpp"
75 #include "Kokkos_ArithTraits.hpp"
79 #ifdef HAVE_TPETRA_INST_FLOAT128
82 template<
class Generator>
83 struct rand<Generator, __float128> {
84 static KOKKOS_INLINE_FUNCTION __float128 max ()
86 return static_cast<__float128
> (1.0);
88 static KOKKOS_INLINE_FUNCTION __float128
93 const __float128 scalingFactor =
94 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
95 static_cast<__float128> (2.0);
96 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
97 const __float128 lowerOrderTerm =
98 static_cast<__float128
> (gen.drand ()) * scalingFactor;
99 return higherOrderTerm + lowerOrderTerm;
101 static KOKKOS_INLINE_FUNCTION __float128
102 draw (Generator& gen,
const __float128& range)
105 const __float128 scalingFactor =
106 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
107 static_cast<__float128> (2.0);
108 const __float128 higherOrderTerm =
109 static_cast<__float128
> (gen.drand (range));
110 const __float128 lowerOrderTerm =
111 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
112 return higherOrderTerm + lowerOrderTerm;
114 static KOKKOS_INLINE_FUNCTION __float128
115 draw (Generator& gen,
const __float128&
start,
const __float128& end)
118 const __float128 scalingFactor =
119 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
120 static_cast<__float128> (2.0);
121 const __float128 higherOrderTerm =
122 static_cast<__float128
> (gen.drand (start, end));
123 const __float128 lowerOrderTerm =
124 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
125 return higherOrderTerm + lowerOrderTerm;
129 #endif // HAVE_TPETRA_INST_FLOAT128
147 template<
class ST,
class LO,
class GO,
class NT>
149 allocDualView (
const size_t lclNumRows,
150 const size_t numCols,
151 const bool zeroOut =
true,
152 const bool allowPadding =
false)
154 using ::Tpetra::Details::Behavior;
155 using Kokkos::AllowPadding;
156 using Kokkos::view_alloc;
157 using Kokkos::WithoutInitializing;
160 typedef typename dual_view_type::t_dev dev_view_type;
166 const std::string label (
"MV::DualView");
167 const bool debug = Behavior::debug ();
177 dev_view_type d_view;
180 d_view = dev_view_type (view_alloc (label, AllowPadding),
181 lclNumRows, numCols);
184 d_view = dev_view_type (view_alloc (label),
185 lclNumRows, numCols);
190 d_view = dev_view_type (view_alloc (label,
193 lclNumRows, numCols);
196 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
197 lclNumRows, numCols);
208 const ST nan = Kokkos::ArithTraits<ST>::nan ();
209 KokkosBlas::fill (d_view, nan);
213 TEUCHOS_TEST_FOR_EXCEPTION
214 (static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
215 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
216 "allocDualView: d_view's dimensions actual dimensions do not match "
217 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
218 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
219 <<
". Please report this bug to the Tpetra developers.");
222 return wrapped_dual_view_type(d_view);
229 template<
class T,
class ExecSpace>
230 struct MakeUnmanagedView {
240 typedef typename std::conditional<
241 Kokkos::SpaceAccessibility<
242 typename ExecSpace::memory_space,
243 Kokkos::HostSpace>::accessible,
244 typename ExecSpace::device_type,
245 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
246 typedef Kokkos::LayoutLeft array_layout;
247 typedef Kokkos::View<T*, array_layout, host_exec_space,
248 Kokkos::MemoryUnmanaged> view_type;
250 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
252 const size_t numEnt =
static_cast<size_t> (x_in.size ());
256 return view_type (x_in.getRawPtr (), numEnt);
262 template<
class WrappedDualViewType>
264 takeSubview (
const WrappedDualViewType& X,
265 const std::pair<size_t, size_t>& rowRng,
266 const Kokkos::ALL_t& colRng)
270 return WrappedDualViewType(X,rowRng,colRng);
273 template<
class WrappedDualViewType>
275 takeSubview (
const WrappedDualViewType& X,
276 const Kokkos::ALL_t& rowRng,
277 const std::pair<size_t, size_t>& colRng)
279 using DualViewType =
typename WrappedDualViewType::DVT;
282 if (X.extent (0) == 0 && X.extent (1) != 0) {
283 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
286 return WrappedDualViewType(X,rowRng,colRng);
290 template<
class WrappedDualViewType>
292 takeSubview (
const WrappedDualViewType& X,
293 const std::pair<size_t, size_t>& rowRng,
294 const std::pair<size_t, size_t>& colRng)
296 using DualViewType =
typename WrappedDualViewType::DVT;
306 if (X.extent (0) == 0 && X.extent (1) != 0) {
307 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
310 return WrappedDualViewType(X,rowRng,colRng);
314 template<
class WrappedOrNotDualViewType>
316 getDualViewStride (
const WrappedOrNotDualViewType& dv)
322 size_t strides[WrappedOrNotDualViewType::t_dev::rank+1];
324 const size_t LDA = strides[1];
325 const size_t numRows = dv.extent (0);
328 return (numRows == 0) ? size_t (1) : numRows;
335 template<
class ViewType>
337 getViewStride (
const ViewType& view)
339 const size_t LDA = view.stride (1);
340 const size_t numRows = view.extent (0);
343 return (numRows == 0) ? size_t (1) : numRows;
350 template <
class impl_scalar_type,
class buffer_device_type>
353 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports
356 if (! imports.need_sync_device ()) {
361 size_t localLengthThreshold =
363 return imports.extent(0) <= localLengthThreshold;
368 template <
class SC,
class LO,
class GO,
class NT>
370 runKernelOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
372 if (! X.need_sync_device ()) {
377 size_t localLengthThreshold =
379 return X.getLocalLength () <= localLengthThreshold;
383 template <
class SC,
class LO,
class GO,
class NT>
385 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
389 using namespace Tpetra;
392 using val_type =
typename MV::impl_scalar_type;
393 using mag_type =
typename MV::mag_type;
394 using dual_view_type =
typename MV::dual_view_type;
397 auto comm = map.is_null () ?
nullptr : map->getComm ().getRawPtr ();
398 auto whichVecs = getMultiVectorWhichVectors (X);
402 const bool runOnHost = runKernelOnHost (X);
404 using view_type =
typename dual_view_type::t_host;
405 using array_layout =
typename view_type::array_layout;
406 using device_type =
typename view_type::device_type;
409 normImpl<val_type, array_layout, device_type,
410 mag_type> (norms, X_lcl, whichNorm, whichVecs,
411 isConstantStride, isDistributed, comm);
414 using view_type =
typename dual_view_type::t_dev;
415 using array_layout =
typename view_type::array_layout;
416 using device_type =
typename view_type::device_type;
419 normImpl<val_type, array_layout, device_type,
420 mag_type> (norms, X_lcl, whichNorm, whichVecs,
421 isConstantStride, isDistributed, comm);
430 template <
typename DstView,
typename SrcView>
431 struct AddAssignFunctor {
434 AddAssignFunctor(DstView &tgt_, SrcView &src_) : tgt(tgt_), src(src_) {}
436 KOKKOS_INLINE_FUNCTION
void
437 operator () (
const size_t k)
const { tgt(k) += src(k); }
444 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
448 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
451 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
457 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
460 const size_t numVecs,
461 const bool zeroOut) :
467 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
470 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
473 const Teuchos::DataAccess copyOrView) :
475 view_ (source.view_),
476 whichVectors_ (source.whichVectors_)
479 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
480 "const Teuchos::DataAccess): ";
484 if (copyOrView == Teuchos::Copy) {
488 this->
view_ = cpy.view_;
491 else if (copyOrView == Teuchos::View) {
494 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
495 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
496 "invalid value " << copyOrView <<
". Valid values include "
497 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
498 << Teuchos::View <<
".");
502 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
505 const dual_view_type& view) :
507 view_ (wrapped_dual_view_type(view))
509 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
510 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
511 map->getLocalNumElements ();
512 const size_t lclNumRows_view = view.extent (0);
513 const size_t LDA = getDualViewStride (
view_);
515 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
516 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
517 std::invalid_argument,
"Kokkos::DualView does not match Map. "
518 "map->getLocalNumElements() = " << lclNumRows_map
519 <<
", view.extent(0) = " << lclNumRows_view
520 <<
", and getStride() = " << LDA <<
".");
522 using ::Tpetra::Details::Behavior;
523 const bool debug = Behavior::debug ();
525 using ::Tpetra::Details::checkGlobalDualViewValidity;
526 std::ostringstream gblErrStrm;
527 const bool verbose = Behavior::verbose ();
528 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
529 const bool gblValid =
530 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
532 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
533 (! gblValid, std::runtime_error, gblErrStrm.str ());
538 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
541 const wrapped_dual_view_type& view) :
545 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
546 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
547 map->getLocalNumElements ();
548 const size_t lclNumRows_view = view.extent (0);
549 const size_t LDA = getDualViewStride (view);
551 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
552 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
553 std::invalid_argument,
"Kokkos::DualView does not match Map. "
554 "map->getLocalNumElements() = " << lclNumRows_map
555 <<
", view.extent(0) = " << lclNumRows_view
556 <<
", and getStride() = " << LDA <<
".");
558 using ::Tpetra::Details::Behavior;
559 const bool debug = Behavior::debug ();
561 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
562 std::ostringstream gblErrStrm;
563 const bool verbose = Behavior::verbose ();
564 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
565 const bool gblValid =
566 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
568 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
569 (! gblValid, std::runtime_error, gblErrStrm.str ());
575 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
578 const typename dual_view_type::t_dev& d_view) :
581 using Teuchos::ArrayRCP;
583 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
587 const size_t LDA = getViewStride (d_view);
588 const size_t lclNumRows = map->getLocalNumElements ();
589 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
590 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
591 "Kokkos::View. map->getLocalNumElements() = " << lclNumRows
592 <<
", View's column stride = " << LDA
593 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
595 auto h_view = Kokkos::create_mirror_view (d_view);
597 view_ = wrapped_dual_view_type(dual_view);
599 using ::Tpetra::Details::Behavior;
600 const bool debug = Behavior::debug ();
602 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
603 std::ostringstream gblErrStrm;
604 const bool verbose = Behavior::verbose ();
605 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
606 const bool gblValid =
607 checkGlobalWrappedDualViewValidity (&gblErrStrm,
view_, verbose,
609 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
610 (! gblValid, std::runtime_error, gblErrStrm.str ());
615 dual_view.modify_device ();
618 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
621 const dual_view_type& view,
622 const dual_view_type& origView) :
624 view_ (wrapped_dual_view_type(view,origView))
626 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
628 const size_t LDA = getDualViewStride (origView);
630 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
631 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
632 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
633 <<
". This may also mean that the input origView's first dimension (number "
634 "of rows = " << origView.extent (0) <<
") does not not match the number "
635 "of entries in the Map on the calling process.");
637 using ::Tpetra::Details::Behavior;
638 const bool debug = Behavior::debug ();
640 using ::Tpetra::Details::checkGlobalDualViewValidity;
641 std::ostringstream gblErrStrm;
642 const bool verbose = Behavior::verbose ();
643 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
644 const bool gblValid_0 =
645 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
647 const bool gblValid_1 =
648 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
650 const bool gblValid = gblValid_0 && gblValid_1;
651 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
652 (! gblValid, std::runtime_error, gblErrStrm.str ());
657 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
660 const dual_view_type& view,
661 const Teuchos::ArrayView<const size_t>& whichVectors) :
664 whichVectors_ (whichVectors.begin (), whichVectors.end ())
667 using Kokkos::subview;
668 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
670 using ::Tpetra::Details::Behavior;
671 const bool debug = Behavior::debug ();
673 using ::Tpetra::Details::checkGlobalDualViewValidity;
674 std::ostringstream gblErrStrm;
675 const bool verbose = Behavior::verbose ();
676 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
677 const bool gblValid =
678 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
680 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
681 (! gblValid, std::runtime_error, gblErrStrm.str ());
684 const size_t lclNumRows = map.is_null () ? size_t (0) :
685 map->getLocalNumElements ();
692 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
693 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
694 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
695 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
696 if (whichVectors.size () != 0) {
697 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
698 view.extent (1) != 0 && view.extent (1) == 0,
699 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
700 " = " << whichVectors.size () <<
" > 0.");
701 size_t maxColInd = 0;
702 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
703 for (size_type k = 0; k < whichVectors.size (); ++k) {
704 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
705 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
706 std::invalid_argument,
"whichVectors[" << k <<
"] = "
707 "Teuchos::OrdinalTraits<size_t>::invalid().");
708 maxColInd = std::max (maxColInd, whichVectors[k]);
710 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
711 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
712 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
713 <<
" <= max(whichVectors) = " << maxColInd <<
".");
718 const size_t LDA = getDualViewStride (view);
719 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
720 (LDA < lclNumRows, std::invalid_argument,
721 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
723 if (whichVectors.size () == 1) {
733 const std::pair<size_t, size_t> colRng (whichVectors[0],
734 whichVectors[0] + 1);
741 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
744 const wrapped_dual_view_type& view,
745 const Teuchos::ArrayView<const size_t>& whichVectors) :
748 whichVectors_ (whichVectors.begin (), whichVectors.end ())
751 using Kokkos::subview;
752 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
754 using ::Tpetra::Details::Behavior;
755 const bool debug = Behavior::debug ();
757 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
758 std::ostringstream gblErrStrm;
759 const bool verbose = Behavior::verbose ();
760 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
761 const bool gblValid =
762 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
764 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
765 (! gblValid, std::runtime_error, gblErrStrm.str ());
768 const size_t lclNumRows = map.is_null () ? size_t (0) :
769 map->getLocalNumElements ();
776 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
777 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
778 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
779 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
780 if (whichVectors.size () != 0) {
781 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
782 view.extent (1) != 0 && view.extent (1) == 0,
783 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
784 " = " << whichVectors.size () <<
" > 0.");
785 size_t maxColInd = 0;
786 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
787 for (size_type k = 0; k < whichVectors.size (); ++k) {
788 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
789 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
790 std::invalid_argument,
"whichVectors[" << k <<
"] = "
791 "Teuchos::OrdinalTraits<size_t>::invalid().");
792 maxColInd = std::max (maxColInd, whichVectors[k]);
794 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
795 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
796 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
797 <<
" <= max(whichVectors) = " << maxColInd <<
".");
802 const size_t LDA = getDualViewStride (view);
803 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
804 (LDA < lclNumRows, std::invalid_argument,
805 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
807 if (whichVectors.size () == 1) {
817 const std::pair<size_t, size_t> colRng (whichVectors[0],
818 whichVectors[0] + 1);
826 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
829 const dual_view_type& view,
830 const dual_view_type& origView,
831 const Teuchos::ArrayView<const size_t>& whichVectors) :
833 view_ (wrapped_dual_view_type(view,origView)),
834 whichVectors_ (whichVectors.begin (), whichVectors.end ())
837 using Kokkos::subview;
838 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
840 using ::Tpetra::Details::Behavior;
841 const bool debug = Behavior::debug ();
843 using ::Tpetra::Details::checkGlobalDualViewValidity;
844 std::ostringstream gblErrStrm;
845 const bool verbose = Behavior::verbose ();
846 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
847 const bool gblValid_0 =
848 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
850 const bool gblValid_1 =
851 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
853 const bool gblValid = gblValid_0 && gblValid_1;
854 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
855 (! gblValid, std::runtime_error, gblErrStrm.str ());
865 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
866 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
867 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
868 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
869 if (whichVectors.size () != 0) {
870 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
871 view.extent (1) != 0 && view.extent (1) == 0,
872 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
873 " = " << whichVectors.size () <<
" > 0.");
874 size_t maxColInd = 0;
875 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
876 for (size_type k = 0; k < whichVectors.size (); ++k) {
877 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
878 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
879 std::invalid_argument,
"whichVectors[" << k <<
"] = "
880 "Teuchos::OrdinalTraits<size_t>::invalid().");
881 maxColInd = std::max (maxColInd, whichVectors[k]);
883 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
884 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
885 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
886 <<
" <= max(whichVectors) = " << maxColInd <<
".");
891 const size_t LDA = getDualViewStride (origView);
892 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
893 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
894 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
895 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
896 <<
", origView.stride(1) = " << origView.d_view.stride(1) <<
".");
898 if (whichVectors.size () == 1) {
907 const std::pair<size_t, size_t> colRng (whichVectors[0],
908 whichVectors[0] + 1);
916 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
919 const Teuchos::ArrayView<const Scalar>& data,
921 const size_t numVecs) :
924 typedef LocalOrdinal LO;
925 typedef GlobalOrdinal GO;
926 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
932 const size_t lclNumRows =
933 map.is_null () ? size_t (0) : map->getLocalNumElements ();
934 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
935 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
936 "map->getLocalNumElements() = " << lclNumRows <<
".");
938 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
939 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
940 (static_cast<size_t> (data.size ()) < minNumEntries,
941 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
942 "entries, given the input Map and number of vectors in the MultiVector."
943 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
944 "map->getLocalNumElements () = " << minNumEntries <<
".");
947 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
959 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
960 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
961 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
966 const size_t outStride =
967 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
968 if (LDA == outStride) {
975 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
977 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
979 for (
size_t j = 0; j < numVecs; ++j) {
980 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
981 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
988 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
991 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
992 const size_t numVecs) :
996 typedef LocalOrdinal LO;
997 typedef GlobalOrdinal GO;
998 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
1001 const size_t lclNumRows =
1002 map.is_null () ? size_t (0) : map->getLocalNumElements ();
1003 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1004 (numVecs < 1 || numVecs != static_cast<size_t> (ArrayOfPtrs.size ()),
1005 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
1006 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
1007 for (
size_t j = 0; j < numVecs; ++j) {
1008 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
1009 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1010 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
1011 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
1012 << X_j_av.size () <<
" < map->getLocalNumElements() = " << lclNumRows
1016 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
1021 using array_layout =
typename decltype (X_out)::array_layout;
1022 using input_col_view_type =
typename Kokkos::View<
const IST*,
1025 Kokkos::MemoryUnmanaged>;
1027 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
1028 for (
size_t j = 0; j < numVecs; ++j) {
1029 Teuchos::ArrayView<const IST> X_j_av =
1030 Teuchos::av_reinterpret_cast<
const IST> (ArrayOfPtrs[j]);
1031 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
1032 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
1038 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1041 return whichVectors_.empty ();
1044 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1049 if (this->getMap ().is_null ()) {
1050 return static_cast<size_t> (0);
1052 return this->getMap ()->getLocalNumElements ();
1056 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1061 if (this->getMap ().is_null ()) {
1062 return static_cast<size_t> (0);
1064 return this->getMap ()->getGlobalNumElements ();
1068 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1073 return isConstantStride () ? getDualViewStride (view_) : size_t (0);
1076 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1082 auto thisData = view_.getDualView().h_view.data();
1083 auto otherData = other.
view_.getDualView().h_view.data();
1084 size_t thisSpan = view_.getDualView().h_view.span();
1085 size_t otherSpan = other.
view_.getDualView().h_view.span();
1086 return (otherData <= thisData && thisData < otherData + otherSpan)
1087 || (thisData <= otherData && otherData < thisData + thisSpan);
1090 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1099 const MV* src =
dynamic_cast<const MV*
> (&sourceObj);
1100 if (src ==
nullptr) {
1114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1118 return this->getNumVectors ();
1121 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1126 const size_t numSameIDs,
1127 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1128 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1130 const execution_space &space)
1132 using ::Tpetra::Details::Behavior;
1134 using ::Tpetra::Details::ProfilingRegion;
1136 using KokkosRefactor::Details::permute_array_multi_column;
1137 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
1138 using Kokkos::Compat::create_const_view;
1142 MV& sourceMV =
const_cast<MV &
>(
dynamic_cast<const MV&
> (sourceObj));
1143 const bool copyOnHost = runKernelOnHost(sourceMV);
1144 const char longFuncNameHost[] =
"Tpetra::MultiVector::copyAndPermute[Host]";
1145 const char longFuncNameDevice[] =
"Tpetra::MultiVector::copyAndPermute[Device]";
1146 const char tfecfFuncName[] =
"copyAndPermute: ";
1147 ProfilingRegion regionCAP (copyOnHost ? longFuncNameHost : longFuncNameDevice);
1149 const bool verbose = Behavior::verbose ();
1150 std::unique_ptr<std::string> prefix;
1152 auto map = this->getMap ();
1153 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1154 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1155 std::ostringstream os;
1156 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
1157 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1158 os <<
"Start" << endl;
1159 std::cerr << os.str ();
1162 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1163 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
1164 std::logic_error,
"permuteToLIDs.extent(0) = "
1165 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
1166 << permuteFromLIDs.extent (0) <<
".");
1167 const size_t numCols = this->getNumVectors ();
1171 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1172 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1173 std::logic_error,
"Input MultiVector needs sync to both host "
1176 std::ostringstream os;
1177 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1178 std::cerr << os.str ();
1182 std::ostringstream os;
1183 os << *prefix <<
"Copy" << endl;
1184 std::cerr << os.str ();
1209 if (numSameIDs > 0) {
1210 const std::pair<size_t, size_t> rows (0, numSameIDs);
1212 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1213 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1215 for (
size_t j = 0; j < numCols; ++j) {
1216 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1217 const size_t srcCol =
1218 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1220 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1221 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1225 Kokkos::RangePolicy<execution_space, size_t>;
1226 range_t rp(space, 0,numSameIDs);
1227 Tpetra::Details::AddAssignFunctor<decltype(tgt_j), decltype(src_j)>
1229 Kokkos::parallel_for(rp, aaf);
1240 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1241 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1243 for (
size_t j = 0; j < numCols; ++j) {
1244 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1245 const size_t srcCol =
1246 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1248 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1249 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1253 Kokkos::RangePolicy<execution_space, size_t>;
1254 range_t rp(space, 0,numSameIDs);
1255 Tpetra::Details::AddAssignFunctor<decltype(tgt_j), decltype(src_j)>
1257 Kokkos::parallel_for(rp, aaf);
1280 if (permuteFromLIDs.extent (0) == 0 ||
1281 permuteToLIDs.extent (0) == 0) {
1283 std::ostringstream os;
1284 os << *prefix <<
"No permutations. Done!" << endl;
1285 std::cerr << os.str ();
1291 std::ostringstream os;
1292 os << *prefix <<
"Permute" << endl;
1293 std::cerr << os.str ();
1298 const bool nonConstStride =
1299 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1302 std::ostringstream os;
1303 os << *prefix <<
"nonConstStride="
1304 << (nonConstStride ?
"true" :
"false") << endl;
1305 std::cerr << os.str ();
1312 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1313 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1314 if (nonConstStride) {
1315 if (this->whichVectors_.size () == 0) {
1316 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1317 tmpTgt.modify_host ();
1318 for (
size_t j = 0; j < numCols; ++j) {
1319 tmpTgt.h_view(j) = j;
1322 tmpTgt.sync_device ();
1324 tgtWhichVecs = tmpTgt;
1327 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1329 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1333 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1334 (static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1335 this->getNumVectors (),
1336 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1337 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1338 this->getNumVectors () <<
".");
1340 if (sourceMV.whichVectors_.size () == 0) {
1341 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1342 tmpSrc.modify_host ();
1343 for (
size_t j = 0; j < numCols; ++j) {
1344 tmpSrc.h_view(j) = j;
1347 tmpSrc.sync_device ();
1349 srcWhichVecs = tmpSrc;
1352 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1353 sourceMV.whichVectors_ ();
1355 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1359 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1360 (static_cast<size_t> (srcWhichVecs.extent (0)) !=
1361 sourceMV.getNumVectors (), std::logic_error,
1362 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1363 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1369 std::ostringstream os;
1370 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1371 std::cerr << os.str ();
1373 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1374 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1376 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1377 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1378 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1379 auto permuteFromLIDs_h =
1380 create_const_view (permuteFromLIDs.view_host ());
1383 std::ostringstream os;
1384 os << *prefix <<
"Permute on host" << endl;
1385 std::cerr << os.str ();
1387 if (nonConstStride) {
1390 auto tgtWhichVecs_h =
1391 create_const_view (tgtWhichVecs.view_host ());
1392 auto srcWhichVecs_h =
1393 create_const_view (srcWhichVecs.view_host ());
1395 using op_type = KokkosRefactor::Details::AddOp;
1396 permute_array_multi_column_variable_stride (tgt_h, src_h,
1400 srcWhichVecs_h, numCols,
1404 using op_type = KokkosRefactor::Details::InsertOp;
1405 permute_array_multi_column_variable_stride (tgt_h, src_h,
1409 srcWhichVecs_h, numCols,
1415 using op_type = KokkosRefactor::Details::AddOp;
1416 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1417 permuteFromLIDs_h, numCols, op_type());
1420 using op_type = KokkosRefactor::Details::InsertOp;
1421 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1422 permuteFromLIDs_h, numCols, op_type());
1428 std::ostringstream os;
1429 os << *prefix <<
"Get permute LIDs on device" << endl;
1430 std::cerr << os.str ();
1432 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1433 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1435 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1436 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1437 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1438 auto permuteFromLIDs_d =
1439 create_const_view (permuteFromLIDs.view_device ());
1442 std::ostringstream os;
1443 os << *prefix <<
"Permute on device" << endl;
1444 std::cerr << os.str ();
1446 if (nonConstStride) {
1449 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1450 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1452 using op_type = KokkosRefactor::Details::AddOp;
1453 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1457 srcWhichVecs_d, numCols,
1461 using op_type = KokkosRefactor::Details::InsertOp;
1462 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1466 srcWhichVecs_d, numCols,
1472 using op_type = KokkosRefactor::Details::AddOp;
1473 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1474 permuteFromLIDs_d, numCols, op_type());
1477 using op_type = KokkosRefactor::Details::InsertOp;
1478 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1479 permuteFromLIDs_d, numCols, op_type());
1485 std::ostringstream os;
1486 os << *prefix <<
"Done!" << endl;
1487 std::cerr << os.str ();
1492 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1495 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1497 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1500 copyAndPermute(sourceObj, numSameIDs, permuteToLIDs, permuteFromLIDs, CM,
1505 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1510 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1511 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1512 Kokkos::DualView<size_t*, buffer_device_type> ,
1513 size_t& constantNumPackets,
1514 const execution_space &space)
1516 using ::Tpetra::Details::Behavior;
1517 using ::Tpetra::Details::ProfilingRegion;
1519 using Kokkos::Compat::create_const_view;
1520 using Kokkos::Compat::getKokkosViewDeepCopy;
1525 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
> (sourceObj));
1527 const bool packOnHost = runKernelOnHost(sourceMV);
1528 const char longFuncNameHost[] =
"Tpetra::MultiVector::packAndPrepare[Host]";
1529 const char longFuncNameDevice[] =
"Tpetra::MultiVector::packAndPrepare[Device]";
1530 const char tfecfFuncName[] =
"packAndPrepare: ";
1531 ProfilingRegion regionPAP (packOnHost ? longFuncNameHost : longFuncNameDevice);
1539 const bool debugCheckIndices = Behavior::debug ();
1544 const bool printDebugOutput = Behavior::verbose ();
1546 std::unique_ptr<std::string> prefix;
1547 if (printDebugOutput) {
1548 auto map = this->getMap ();
1549 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1550 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1551 std::ostringstream os;
1552 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1553 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1554 os <<
"Start" << endl;
1555 std::cerr << os.str ();
1559 const size_t numCols = sourceMV.getNumVectors ();
1564 constantNumPackets = numCols;
1568 if (exportLIDs.extent (0) == 0) {
1569 if (printDebugOutput) {
1570 std::ostringstream os;
1571 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1572 std::cerr << os.str ();
1592 const size_t numExportLIDs = exportLIDs.extent (0);
1593 const size_t newExportsSize = numCols * numExportLIDs;
1594 if (printDebugOutput) {
1595 std::ostringstream os;
1596 os << *prefix <<
"realloc: "
1597 <<
"numExportLIDs: " << numExportLIDs
1598 <<
", exports.extent(0): " << exports.extent (0)
1599 <<
", newExportsSize: " << newExportsSize << std::endl;
1600 std::cerr << os.str ();
1606 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1607 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1608 std::logic_error,
"Input MultiVector needs sync to both host "
1610 if (printDebugOutput) {
1611 std::ostringstream os;
1612 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1613 std::cerr << os.str ();
1625 exports.clear_sync_state ();
1626 exports.modify_host ();
1634 exports.clear_sync_state ();
1635 exports.modify_device ();
1651 if (sourceMV.isConstantStride ()) {
1652 using KokkosRefactor::Details::pack_array_single_column;
1653 if (printDebugOutput) {
1654 std::ostringstream os;
1655 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1656 std::cerr << os.str ();
1659 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1660 pack_array_single_column (exports.view_host (),
1662 exportLIDs.view_host (),
1667 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1668 pack_array_single_column (exports.view_device (),
1670 exportLIDs.view_device (),
1677 using KokkosRefactor::Details::pack_array_single_column;
1678 if (printDebugOutput) {
1679 std::ostringstream os;
1680 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1681 std::cerr << os.str ();
1684 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1685 pack_array_single_column (exports.view_host (),
1687 exportLIDs.view_host (),
1688 sourceMV.whichVectors_[0],
1692 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1693 pack_array_single_column (exports.view_device (),
1695 exportLIDs.view_device (),
1696 sourceMV.whichVectors_[0],
1703 if (sourceMV.isConstantStride ()) {
1704 using KokkosRefactor::Details::pack_array_multi_column;
1705 if (printDebugOutput) {
1706 std::ostringstream os;
1707 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1708 std::cerr << os.str ();
1711 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1712 pack_array_multi_column (exports.view_host (),
1714 exportLIDs.view_host (),
1719 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1720 pack_array_multi_column (exports.view_device (),
1722 exportLIDs.view_device (),
1729 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1730 if (printDebugOutput) {
1731 std::ostringstream os;
1732 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1734 std::cerr << os.str ();
1739 using IST = impl_scalar_type;
1740 using DV = Kokkos::DualView<IST*, device_type>;
1741 using HES =
typename DV::t_host::execution_space;
1742 using DES =
typename DV::t_dev::execution_space;
1743 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1745 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1746 pack_array_multi_column_variable_stride
1747 (exports.view_host (),
1749 exportLIDs.view_host (),
1750 getKokkosViewDeepCopy<HES> (whichVecs),
1755 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1756 pack_array_multi_column_variable_stride
1757 (exports.view_device (),
1759 exportLIDs.view_device (),
1760 getKokkosViewDeepCopy<DES> (whichVecs),
1762 debugCheckIndices, space);
1767 if (printDebugOutput) {
1768 std::ostringstream os;
1769 os << *prefix <<
"Done!" << endl;
1770 std::cerr << os.str ();
1776 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1781 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1782 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1783 Kokkos::DualView<size_t*, buffer_device_type> numExportPacketsPerLID,
1784 size_t& constantNumPackets) {
1785 packAndPrepare(sourceObj, exportLIDs, exports, numExportPacketsPerLID, constantNumPackets, execution_space());
1789 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1791 typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1792 typename NO::device_type::memory_space>::value,
1797 const std::string* prefix,
1798 const bool areRemoteLIDsContiguous,
1807 std::ostringstream os;
1808 os << *prefix <<
"Realloc (if needed) imports_ from "
1809 << this->imports_.extent (0) <<
" to " << newSize << std::endl;
1810 std::cerr << os.str ();
1813 bool reallocated =
false;
1815 using IST = impl_scalar_type;
1816 using DV = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
1828 if ((dual_view_type::impl_dualview_is_single_device::value ||
1829 (Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_device()) ||
1830 (!Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_host())) &&
1831 areRemoteLIDsContiguous &&
1833 (getNumVectors() == 1) &&
1836 size_t offset = getLocalLength () - newSize;
1837 reallocated = this->imports_.d_view.data() != view_.getDualView().d_view.data() + offset;
1839 typedef std::pair<size_t, size_t> range_type;
1840 this->imports_ = DV(view_.getDualView(),
1841 range_type (offset, getLocalLength () ),
1845 std::ostringstream os;
1846 os << *prefix <<
"Aliased imports_ to MV.view_" << std::endl;
1847 std::cerr << os.str ();
1857 std::ostringstream os;
1858 os << *prefix <<
"Finished realloc'ing unaliased_imports_" << std::endl;
1859 std::cerr << os.str ();
1861 this->imports_ = this->unaliased_imports_;
1866 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1868 typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1869 typename NO::device_type::memory_space>::value,
1874 const std::string* prefix,
1875 const bool areRemoteLIDsContiguous,
1881 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1886 const std::string* prefix,
1887 const bool areRemoteLIDsContiguous,
1890 return reallocImportsIfNeededImpl<Node>(newSize, verbose, prefix, areRemoteLIDsContiguous, CM);
1893 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1897 return (this->imports_.d_view.data() + this->imports_.d_view.extent(0) ==
1898 view_.getDualView().d_view.data() + view_.getDualView().d_view.extent(0));
1902 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1906 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1907 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1908 Kokkos::DualView<size_t*, buffer_device_type> ,
1909 const size_t constantNumPackets,
1911 const execution_space &space)
1913 using ::Tpetra::Details::Behavior;
1914 using ::Tpetra::Details::ProfilingRegion;
1915 using KokkosRefactor::Details::unpack_array_multi_column;
1916 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1917 using Kokkos::Compat::getKokkosViewDeepCopy;
1920 const bool unpackOnHost = runKernelOnHost(imports);
1922 const char longFuncNameHost[] =
"Tpetra::MultiVector::unpackAndCombine[Host]";
1923 const char longFuncNameDevice[] =
"Tpetra::MultiVector::unpackAndCombine[Device]";
1924 const char * longFuncName = unpackOnHost ? longFuncNameHost : longFuncNameDevice;
1925 const char tfecfFuncName[] =
"unpackAndCombine: ";
1926 ProfilingRegion regionUAC (longFuncName);
1934 const bool debugCheckIndices = Behavior::debug ();
1936 const bool printDebugOutput = Behavior::verbose ();
1937 std::unique_ptr<std::string> prefix;
1938 if (printDebugOutput) {
1939 auto map = this->getMap ();
1940 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1941 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1942 std::ostringstream os;
1943 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1944 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1945 os <<
"Start" << endl;
1946 std::cerr << os.str ();
1950 if (importLIDs.extent (0) == 0) {
1951 if (printDebugOutput) {
1952 std::ostringstream os;
1953 os << *prefix <<
"No imports. Done!" << endl;
1954 std::cerr << os.str ();
1960 if (importsAreAliased()) {
1961 if (printDebugOutput) {
1962 std::ostringstream os;
1963 os << *prefix <<
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" << endl;
1964 std::cerr << os.str ();
1970 const size_t numVecs = getNumVectors ();
1971 if (debugCheckIndices) {
1972 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1973 (static_cast<size_t> (imports.extent (0)) !=
1974 numVecs * importLIDs.extent (0),
1976 "imports.extent(0) = " << imports.extent (0)
1977 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1978 <<
" * " << importLIDs.extent (0) <<
" = "
1979 << numVecs * importLIDs.extent (0) <<
".");
1981 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1982 (constantNumPackets == static_cast<size_t> (0), std::runtime_error,
1983 "constantNumPackets input argument must be nonzero.");
1985 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1986 (static_cast<size_t> (numVecs) !=
1987 static_cast<size_t> (constantNumPackets),
1988 std::runtime_error,
"constantNumPackets must equal numVecs.");
1997 if (this->imports_.need_sync_host()) this->imports_.sync_host();
2000 if (this->imports_.need_sync_device()) this->imports_.sync_device();
2003 if (printDebugOutput) {
2004 std::ostringstream os;
2005 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
2007 std::cerr << os.str ();
2012 auto imports_d = imports.view_device ();
2013 auto imports_h = imports.view_host ();
2014 auto importLIDs_d = importLIDs.view_device ();
2015 auto importLIDs_h = importLIDs.view_host ();
2017 Kokkos::DualView<size_t*, device_type> whichVecs;
2018 if (! isConstantStride ()) {
2019 Kokkos::View<
const size_t*, Kokkos::HostSpace,
2020 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
2022 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
2024 whichVecs.modify_host ();
2029 whichVecs.modify_device ();
2034 auto whichVecs_d = whichVecs.view_device ();
2035 auto whichVecs_h = whichVecs.view_host ();
2045 if (numVecs > 0 && importLIDs.extent (0) > 0) {
2046 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
2047 using host_exec_space =
typename dual_view_type::t_host::execution_space;
2050 const bool use_atomic_updates = unpackOnHost ?
2051 host_exec_space().concurrency () != 1 :
2052 dev_exec_space().concurrency () != 1;
2054 if (printDebugOutput) {
2055 std::ostringstream os;
2057 std::cerr << os.str ();
2064 using op_type = KokkosRefactor::Details::InsertOp;
2065 if (isConstantStride ()) {
2067 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2068 unpack_array_multi_column (host_exec_space (),
2069 X_h, imports_h, importLIDs_h,
2070 op_type (), numVecs,
2076 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2077 unpack_array_multi_column (space,
2078 X_d, imports_d, importLIDs_d,
2079 op_type (), numVecs,
2086 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2087 unpack_array_multi_column_variable_stride (host_exec_space (),
2097 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2098 unpack_array_multi_column_variable_stride (space,
2110 using op_type = KokkosRefactor::Details::AddOp;
2111 if (isConstantStride ()) {
2113 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2114 unpack_array_multi_column (host_exec_space (),
2115 X_h, imports_h, importLIDs_h,
2116 op_type (), numVecs,
2121 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2122 unpack_array_multi_column (space,
2123 X_d, imports_d, importLIDs_d,
2124 op_type (), numVecs,
2131 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2132 unpack_array_multi_column_variable_stride (host_exec_space (),
2142 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2143 unpack_array_multi_column_variable_stride (space,
2155 using op_type = KokkosRefactor::Details::AbsMaxOp;
2156 if (isConstantStride ()) {
2158 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2159 unpack_array_multi_column (host_exec_space (),
2160 X_h, imports_h, importLIDs_h,
2161 op_type (), numVecs,
2166 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2167 unpack_array_multi_column (space,
2168 X_d, imports_d, importLIDs_d,
2169 op_type (), numVecs,
2176 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2177 unpack_array_multi_column_variable_stride (host_exec_space (),
2187 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2188 unpack_array_multi_column_variable_stride (space,
2200 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2201 (
true, std::logic_error,
"Invalid CombineMode");
2205 if (printDebugOutput) {
2206 std::ostringstream os;
2207 os << *prefix <<
"Nothing to unpack" << endl;
2208 std::cerr << os.str ();
2212 if (printDebugOutput) {
2213 std::ostringstream os;
2214 os << *prefix <<
"Done!" << endl;
2215 std::cerr << os.str ();
2220 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2224 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
2225 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
2226 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
2227 const size_t constantNumPackets,
2229 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets, CM, execution_space());
2233 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2238 if (isConstantStride ()) {
2239 return static_cast<size_t> (view_.extent (1));
2241 return static_cast<size_t> (whichVectors_.size ());
2249 gblDotImpl (
const RV& dotsOut,
2250 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2251 const bool distributed)
2253 using Teuchos::REDUCE_MAX;
2254 using Teuchos::REDUCE_SUM;
2255 using Teuchos::reduceAll;
2256 typedef typename RV::non_const_value_type dot_type;
2258 const size_t numVecs = dotsOut.extent (0);
2273 if (distributed && ! comm.is_null ()) {
2276 const int nv =
static_cast<int> (numVecs);
2277 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
2279 if (commIsInterComm) {
2283 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
2286 const dot_type*
const lclSum = lclDots.data ();
2287 dot_type*
const gblSum = dotsOut.data ();
2288 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2291 dot_type*
const inout = dotsOut.data ();
2292 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
2298 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2302 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
2304 using ::Tpetra::Details::Behavior;
2305 using Kokkos::subview;
2306 using Teuchos::Comm;
2307 using Teuchos::null;
2310 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
2311 typedef typename dual_view_type::t_dev_const XMV;
2312 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
2316 const size_t numVecs = this->getNumVectors ();
2320 const size_t lclNumRows = this->getLocalLength ();
2321 const size_t numDots =
static_cast<size_t> (dots.extent (0));
2322 const bool debug = Behavior::debug ();
2325 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
2326 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2327 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
2328 "compatible with the input MultiVector A. We only test for this "
2339 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2341 "MultiVectors do not have the same local length. "
2342 "this->getLocalLength() = " << lclNumRows <<
" != "
2344 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2346 "MultiVectors must have the same number of columns (vectors). "
2347 "this->getNumVectors() = " << numVecs <<
" != "
2349 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2350 numDots != numVecs, std::runtime_error,
2351 "The output array 'dots' must have the same number of entries as the "
2352 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2353 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2355 const std::pair<size_t, size_t> colRng (0, numVecs);
2356 RV dotsOut = subview (dots, colRng);
2357 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
2358 this->getMap ()->getComm ();
2360 auto thisView = this->getLocalViewDevice(Access::ReadOnly);
2363 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
2364 this->whichVectors_.getRawPtr (),
2375 exec_space_instance.fence();
2377 gblDotImpl (dotsOut, comm, this->isDistributed ());
2381 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2386 using ::Tpetra::Details::ProfilingRegion;
2388 using dot_type =
typename MV::dot_type;
2389 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
2392 Teuchos::RCP<const Teuchos::Comm<int> > comm =
2393 map.is_null () ? Teuchos::null : map->getComm ();
2394 if (comm.is_null ()) {
2395 return Kokkos::ArithTraits<dot_type>::zero ();
2398 using LO = LocalOrdinal;
2402 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
2404 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
2405 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
2406 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
2413 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
2415 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
2416 lclDot = KokkosBlas::dot (x_1d, y_1d);
2419 using Teuchos::outArg;
2420 using Teuchos::REDUCE_SUM;
2421 using Teuchos::reduceAll;
2422 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2434 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2438 const Teuchos::ArrayView<dot_type>& dots)
const
2440 const char tfecfFuncName[] =
"dot: ";
2443 const size_t numVecs = this->getNumVectors ();
2444 const size_t lclNumRows = this->getLocalLength ();
2445 const size_t numDots =
static_cast<size_t> (dots.size ());
2455 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2457 "MultiVectors do not have the same local length. "
2458 "this->getLocalLength() = " << lclNumRows <<
" != "
2460 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2462 "MultiVectors must have the same number of columns (vectors). "
2463 "this->getNumVectors() = " << numVecs <<
" != "
2465 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2466 (numDots != numVecs, std::runtime_error,
2467 "The output array 'dots' must have the same number of entries as the "
2468 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2469 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2472 const dot_type gblDot = multiVectorSingleColumnDot (*
this, A);
2476 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2480 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2483 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2486 using ::Tpetra::Details::NORM_TWO;
2487 using ::Tpetra::Details::ProfilingRegion;
2488 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2491 MV& X =
const_cast<MV&
> (*this);
2492 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2495 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2498 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2500 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2501 this->norm2 (norms_av);
2505 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2508 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2511 using ::Tpetra::Details::NORM_ONE;
2512 using ::Tpetra::Details::ProfilingRegion;
2513 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2516 MV& X =
const_cast<MV&
> (*this);
2517 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2520 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2523 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2525 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2526 this->norm1 (norms_av);
2529 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2532 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2535 using ::Tpetra::Details::NORM_INF;
2536 using ::Tpetra::Details::ProfilingRegion;
2537 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2540 MV& X =
const_cast<MV&
> (*this);
2541 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2544 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2547 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2549 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2550 this->normInf (norms_av);
2553 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2556 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2560 using Kokkos::subview;
2561 using Teuchos::Comm;
2563 using Teuchos::reduceAll;
2564 using Teuchos::REDUCE_SUM;
2565 typedef Kokkos::ArithTraits<impl_scalar_type> ATS;
2567 const size_t lclNumRows = this->getLocalLength ();
2568 const size_t numVecs = this->getNumVectors ();
2569 const size_t numMeans =
static_cast<size_t> (means.size ());
2571 TEUCHOS_TEST_FOR_EXCEPTION(
2572 numMeans != numVecs, std::runtime_error,
2573 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2574 <<
" != this->getNumVectors() = " << numVecs <<
".");
2576 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2577 const std::pair<size_t, size_t> colRng (0, numVecs);
2582 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2584 typename local_view_type::HostMirror::array_layout,
2586 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2587 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2589 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2590 this->getMap ()->getComm ();
2593 const bool useHostVersion = this->need_sync_device ();
2594 if (useHostVersion) {
2596 auto X_lcl = subview (getLocalViewHost(Access::ReadOnly),
2597 rowRng, Kokkos::ALL ());
2599 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2600 if (isConstantStride ()) {
2601 KokkosBlas::sum (lclSums, X_lcl);
2604 for (
size_t j = 0; j < numVecs; ++j) {
2605 const size_t col = whichVectors_[j];
2606 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2613 if (! comm.is_null () && this->isDistributed ()) {
2614 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2615 lclSums.data (), meansOut.data ());
2624 auto X_lcl = subview (this->getLocalViewDevice(Access::ReadOnly),
2625 rowRng, Kokkos::ALL ());
2628 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2629 if (isConstantStride ()) {
2630 KokkosBlas::sum (lclSums, X_lcl);
2633 for (
size_t j = 0; j < numVecs; ++j) {
2634 const size_t col = whichVectors_[j];
2635 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2645 exec_space_instance.fence();
2651 if (! comm.is_null () && this->isDistributed ()) {
2652 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2653 lclSums.data (), meansOut.data ());
2664 const impl_scalar_type OneOverN =
2665 ATS::one () /
static_cast<mag_type> (this->getGlobalLength ());
2666 for (
size_t k = 0; k < numMeans; ++k) {
2667 meansOut(k) = meansOut(k) * OneOverN;
2672 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2678 typedef Kokkos::ArithTraits<IST> ATS;
2679 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2680 typedef typename pool_type::generator_type generator_type;
2682 const IST max = Kokkos::rand<generator_type, IST>::max ();
2683 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2685 this->randomize (static_cast<Scalar> (min), static_cast<Scalar> (max));
2689 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2695 typedef Tpetra::Details::Static_Random_XorShift64_Pool<typename device_type::execution_space> tpetra_pool_type;
2696 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2699 if(!tpetra_pool_type::isSet())
2700 tpetra_pool_type::resetPool(this->getMap()->getComm()->getRank());
2702 pool_type & rand_pool = tpetra_pool_type::getPool();
2703 const IST max =
static_cast<IST
> (maxVal);
2704 const IST min =
static_cast<IST
> (minVal);
2706 auto thisView = this->getLocalViewDevice(Access::OverwriteAll);
2708 if (isConstantStride ()) {
2709 Kokkos::fill_random (thisView, rand_pool, min, max);
2712 const size_t numVecs = getNumVectors ();
2713 for (
size_t k = 0; k < numVecs; ++k) {
2714 const size_t col = whichVectors_[k];
2715 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2716 Kokkos::fill_random (X_k, rand_pool, min, max);
2721 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2726 using ::Tpetra::Details::ProfilingRegion;
2727 using ::Tpetra::Details::Blas::fill;
2728 using DES =
typename dual_view_type::t_dev::execution_space;
2729 using HES =
typename dual_view_type::t_host::execution_space;
2730 using LO = LocalOrdinal;
2731 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2736 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
2737 const LO numVecs =
static_cast<LO
> (this->getNumVectors ());
2743 const bool runOnHost = runKernelOnHost(*
this);
2745 auto X = this->getLocalViewDevice(Access::OverwriteAll);
2746 if (this->isConstantStride ()) {
2747 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2750 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2751 this->whichVectors_.getRawPtr ());
2755 auto X = this->getLocalViewHost(Access::OverwriteAll);
2756 if (this->isConstantStride ()) {
2757 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2760 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2761 this->whichVectors_.getRawPtr ());
2767 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2772 using Teuchos::ArrayRCP;
2773 using Teuchos::Comm;
2776 using LO = LocalOrdinal;
2777 using GO = GlobalOrdinal;
2783 TEUCHOS_TEST_FOR_EXCEPTION(
2784 ! this->isConstantStride (), std::logic_error,
2785 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2786 "if the MultiVector is a column view of another MultiVector (that is, if "
2787 "isConstantStride() == false).");
2817 if (this->getMap ().is_null ()) {
2822 TEUCHOS_TEST_FOR_EXCEPTION(
2823 newMap.is_null (), std::invalid_argument,
2824 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2825 "This probably means that the input Map is incorrect.");
2829 const size_t newNumRows = newMap->getLocalNumElements ();
2830 const size_t origNumRows = view_.extent (0);
2831 const size_t numCols = this->getNumVectors ();
2833 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2834 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2837 else if (newMap.is_null ()) {
2840 const size_t newNumRows =
static_cast<size_t> (0);
2841 const size_t numCols = this->getNumVectors ();
2842 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2845 this->map_ = newMap;
2848 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2856 const IST theAlpha =
static_cast<IST
> (alpha);
2857 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2860 const size_t lclNumRows = getLocalLength ();
2861 const size_t numVecs = getNumVectors ();
2862 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2863 const std::pair<size_t, size_t> colRng (0, numVecs);
2871 const bool useHostVersion = need_sync_device ();
2872 if (useHostVersion) {
2873 auto Y_lcl = Kokkos::subview (getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2874 if (isConstantStride ()) {
2875 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2878 for (
size_t k = 0; k < numVecs; ++k) {
2879 const size_t Y_col = whichVectors_[k];
2880 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2881 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2886 auto Y_lcl = Kokkos::subview (getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2887 if (isConstantStride ()) {
2888 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2891 for (
size_t k = 0; k < numVecs; ++k) {
2892 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2893 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2894 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2901 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2904 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2906 const size_t numVecs = this->getNumVectors ();
2907 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2908 TEUCHOS_TEST_FOR_EXCEPTION(
2909 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2910 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2914 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2915 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2916 k_alphas.modify_host ();
2917 for (
size_t i = 0; i < numAlphas; ++i) {
2920 k_alphas.sync_device ();
2922 this->scale (k_alphas.view_device ());
2925 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2928 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2931 using Kokkos::subview;
2933 const size_t lclNumRows = this->getLocalLength ();
2934 const size_t numVecs = this->getNumVectors ();
2935 TEUCHOS_TEST_FOR_EXCEPTION(
2936 static_cast<size_t> (alphas.extent (0)) != numVecs,
2937 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2938 "alphas.extent(0) = " << alphas.extent (0)
2939 <<
" != this->getNumVectors () = " << numVecs <<
".");
2940 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2941 const std::pair<size_t, size_t> colRng (0, numVecs);
2951 const bool useHostVersion = this->need_sync_device ();
2952 if (useHostVersion) {
2955 auto alphas_h = Kokkos::create_mirror_view (alphas);
2959 auto Y_lcl = subview (this->getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2960 if (isConstantStride ()) {
2961 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2964 for (
size_t k = 0; k < numVecs; ++k) {
2965 const size_t Y_col = this->isConstantStride () ? k :
2966 this->whichVectors_[k];
2967 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2970 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2975 auto Y_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2976 if (isConstantStride ()) {
2977 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2984 auto alphas_h = Kokkos::create_mirror_view (alphas);
2988 for (
size_t k = 0; k < numVecs; ++k) {
2989 const size_t Y_col = this->isConstantStride () ? k :
2990 this->whichVectors_[k];
2991 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2992 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2998 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3005 using Kokkos::subview;
3006 const char tfecfFuncName[] =
"scale: ";
3008 const size_t lclNumRows = getLocalLength ();
3009 const size_t numVecs = getNumVectors ();
3011 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3013 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
3015 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3017 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3021 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3022 const std::pair<size_t, size_t> colRng (0, numVecs);
3024 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
3026 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
3027 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
3030 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
3034 for (
size_t k = 0; k < numVecs; ++k) {
3035 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3037 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3038 auto X_k = subview (X_lcl, ALL (), X_col);
3040 KokkosBlas::scal (Y_k, theAlpha, X_k);
3047 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3052 const char tfecfFuncName[] =
"reciprocal: ";
3054 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3056 "MultiVectors do not have the same local length. "
3057 "this->getLocalLength() = " << getLocalLength ()
3059 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3060 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3061 ": MultiVectors do not have the same number of columns (vectors). "
3062 "this->getNumVectors() = " << getNumVectors ()
3063 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3065 const size_t numVecs = getNumVectors ();
3067 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
3071 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
3075 using Kokkos::subview;
3076 for (
size_t k = 0; k < numVecs; ++k) {
3077 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3078 auto vector_k = subview (this_view_dev, ALL (), this_col);
3079 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3080 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3081 KokkosBlas::reciprocal (vector_k, vector_Ak);
3086 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3091 const char tfecfFuncName[] =
"abs";
3093 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3095 ": MultiVectors do not have the same local length. "
3096 "this->getLocalLength() = " << getLocalLength ()
3098 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3099 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3100 ": MultiVectors do not have the same number of columns (vectors). "
3101 "this->getNumVectors() = " << getNumVectors ()
3102 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3103 const size_t numVecs = getNumVectors ();
3105 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
3109 KokkosBlas::abs (this_view_dev, A_view_dev);
3113 using Kokkos::subview;
3115 for (
size_t k=0; k < numVecs; ++k) {
3116 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3117 auto vector_k = subview (this_view_dev, ALL (), this_col);
3118 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3119 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3120 KokkosBlas::abs (vector_k, vector_Ak);
3125 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3132 const char tfecfFuncName[] =
"update: ";
3133 using Kokkos::subview;
3138 const size_t lclNumRows = getLocalLength ();
3139 const size_t numVecs = getNumVectors ();
3142 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3144 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
3146 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3148 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3154 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3155 const std::pair<size_t, size_t> colRng (0, numVecs);
3157 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
3158 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
3160 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
3164 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
3168 for (
size_t k = 0; k < numVecs; ++k) {
3169 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3171 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3172 auto X_k = subview (X_lcl, ALL (), X_col);
3174 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
3179 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3186 const Scalar& gamma)
3189 using Kokkos::subview;
3191 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3195 const size_t lclNumRows = this->getLocalLength ();
3196 const size_t numVecs = getNumVectors ();
3199 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3201 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
3202 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3204 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3206 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
3207 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3209 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3211 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
3212 "but this MultiVector has " << numVecs <<
" column(s).");
3213 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3215 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
3216 "but this MultiVector has " << numVecs <<
" column(s).");
3223 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3224 const std::pair<size_t, size_t> colRng (0, numVecs);
3229 auto C_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
3234 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3239 for (
size_t k = 0; k < numVecs; ++k) {
3240 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3243 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3244 theBeta, subview (B_lcl, rowRng, B_col),
3245 theGamma, subview (C_lcl, rowRng, this_col));
3250 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3251 Teuchos::ArrayRCP<const Scalar>
3257 const char tfecfFuncName[] =
"getData: ";
3264 auto hostView = getLocalViewHost(Access::ReadOnly);
3265 const size_t col = isConstantStride () ? j : whichVectors_[j];
3266 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
3267 Teuchos::ArrayRCP<const IST> dataAsArcp =
3268 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3270 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3271 (static_cast<size_t> (hostView_j.extent (0)) <
3272 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3273 "hostView_j.extent(0) = " << hostView_j.extent (0)
3274 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3275 "Please report this bug to the Tpetra developers.");
3277 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3280 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3281 Teuchos::ArrayRCP<Scalar>
3286 using Kokkos::subview;
3288 const char tfecfFuncName[] =
"getDataNonConst: ";
3290 auto hostView = getLocalViewHost(Access::ReadWrite);
3291 const size_t col = isConstantStride () ? j : whichVectors_[j];
3292 auto hostView_j = subview (hostView, ALL (), col);
3293 Teuchos::ArrayRCP<IST> dataAsArcp =
3294 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3297 (static_cast<size_t> (hostView_j.extent (0)) <
3298 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3299 "hostView_j.extent(0) = " << hostView_j.extent (0)
3300 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3301 "Please report this bug to the Tpetra developers.");
3303 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3306 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3307 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3309 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3316 bool contiguous =
true;
3317 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3318 for (
size_t j = 1; j < numCopyVecs; ++j) {
3319 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3324 if (contiguous && numCopyVecs > 0) {
3325 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3328 RCP<const MV> X_sub = this->subView (cols);
3329 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3335 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3336 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3343 RCP<const MV> X_sub = this->subView (colRng);
3344 RCP<MV> Y (
new MV (this->getMap (), static_cast<size_t> (colRng.size ()),
false));
3349 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3353 return view_.origExtent(0);
3356 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3360 return view_.origExtent(1);
3363 template <
class Scalar,
class LO,
class GO,
class Node>
3366 const Teuchos::RCP<const map_type>& subMap,
3367 const local_ordinal_type rowOffset) :
3371 using Kokkos::subview;
3372 using Teuchos::outArg;
3375 using Teuchos::reduceAll;
3376 using Teuchos::REDUCE_MIN;
3379 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3380 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3383 std::unique_ptr<std::ostringstream> errStrm;
3390 const auto comm = subMap->getComm ();
3391 TEUCHOS_ASSERT( ! comm.is_null () );
3392 const int myRank = comm->getRank ();
3394 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3396 const LO newNumRows =
static_cast<LO
> (subMap->getLocalNumElements ());
3398 std::ostringstream os;
3399 os <<
"Proc " << myRank <<
": " << prefix
3400 <<
"X: {lclNumRows: " << lclNumRowsBefore
3402 <<
", numCols: " << numCols <<
"}, "
3403 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3404 std::cerr << os.str ();
3409 const bool tooManyElts =
3412 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3413 *errStrm <<
" Proc " << myRank <<
": subMap->getLocalNumElements() (="
3414 << newNumRows <<
") + rowOffset (=" << rowOffset
3418 TEUCHOS_TEST_FOR_EXCEPTION
3419 (! debug && tooManyElts, std::invalid_argument,
3420 prefix << errStrm->str () << suffix);
3424 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3426 std::ostringstream gblErrStrm;
3427 const std::string myErrStr =
3428 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3429 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3430 TEUCHOS_TEST_FOR_EXCEPTION
3431 (
true, std::invalid_argument, gblErrStrm.str ());
3435 using range_type = std::pair<LO, LO>;
3436 const range_type origRowRng
3437 (rowOffset, static_cast<LO> (X.
view_.origExtent (0)));
3438 const range_type rowRng
3439 (rowOffset, rowOffset + newNumRows);
3441 wrapped_dual_view_type newView = takeSubview (X.
view_, rowRng, ALL ());
3448 if (newView.extent (0) == 0 &&
3449 newView.extent (1) != X.
view_.extent (1)) {
3451 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3455 MV (subMap, newView) :
3459 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3460 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3461 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3463 if (errStrm.get () ==
nullptr) {
3464 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3466 *errStrm <<
" Proc " << myRank <<
3467 ": subMap.getLocalNumElements(): " << newNumRows <<
3468 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3469 ", X.getNumVectors(): " << numCols <<
3470 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3472 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3474 std::ostringstream gblErrStrm;
3476 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3477 "dimensions on one or more processes:" << endl;
3479 const std::string myErrStr =
3480 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3481 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3482 gblErrStrm << suffix << endl;
3483 TEUCHOS_TEST_FOR_EXCEPTION
3484 (
true, std::invalid_argument, gblErrStrm.str ());
3489 std::ostringstream os;
3490 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3491 std::cerr << os.str ();
3497 std::ostringstream os;
3498 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3499 std::cerr << os.str ();
3503 template <
class Scalar,
class LO,
class GO,
class Node>
3506 const map_type& subMap,
3507 const size_t rowOffset) :
3508 MultiVector (X, Teuchos::RCP<const map_type> (new map_type (subMap)),
3512 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3513 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3516 const size_t offset)
const
3519 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3522 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3523 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3526 const size_t offset)
3529 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3532 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3533 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3535 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3537 using Teuchos::Array;
3541 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3542 TEUCHOS_TEST_FOR_EXCEPTION(
3543 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3544 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3545 "contain at least one entry, but cols.size() = " << cols.size ()
3550 bool contiguous =
true;
3551 for (
size_t j = 1; j < numViewCols; ++j) {
3552 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3558 if (numViewCols == 0) {
3560 return rcp (
new MV (this->getMap (), numViewCols));
3563 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3567 if (isConstantStride ()) {
3568 return rcp (
new MV (this->getMap (), view_, cols));
3571 Array<size_t> newcols (cols.size ());
3572 for (
size_t j = 0; j < numViewCols; ++j) {
3573 newcols[j] = whichVectors_[cols[j]];
3575 return rcp (
new MV (this->getMap (), view_, newcols ()));
3580 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3581 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3585 using ::Tpetra::Details::Behavior;
3587 using Kokkos::subview;
3588 using Teuchos::Array;
3592 const char tfecfFuncName[] =
"subView(Range1D): ";
3594 const size_t lclNumRows = this->getLocalLength ();
3595 const size_t numVecs = this->getNumVectors ();
3599 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3600 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3601 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3603 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3604 numVecs != 0 && colRng.size () != 0 &&
3605 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3606 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3607 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3608 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3609 "[0, " << numVecs <<
"].");
3611 RCP<const MV> X_ret;
3621 const std::pair<size_t, size_t> rows (0, lclNumRows);
3622 if (colRng.size () == 0) {
3623 const std::pair<size_t, size_t> cols (0, 0);
3624 wrapped_dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3625 X_ret = rcp (
new MV (this->getMap (), X_sub));
3629 if (isConstantStride ()) {
3630 const std::pair<size_t, size_t> cols (colRng.lbound (),
3631 colRng.ubound () + 1);
3632 wrapped_dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3633 X_ret = rcp (
new MV (this->getMap (), X_sub));
3636 if (static_cast<size_t> (colRng.size ()) == static_cast<size_t> (1)) {
3639 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3640 whichVectors_[0] + colRng.ubound () + 1);
3641 wrapped_dual_view_type X_sub = takeSubview (view_, ALL (), col);
3642 X_ret = rcp (
new MV (this->getMap (), X_sub));
3645 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3646 whichVectors_.begin () + colRng.ubound () + 1);
3647 X_ret = rcp (
new MV (this->getMap (), view_, which));
3652 const bool debug = Behavior::debug ();
3654 using Teuchos::Comm;
3655 using Teuchos::outArg;
3656 using Teuchos::REDUCE_MIN;
3657 using Teuchos::reduceAll;
3659 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3660 Teuchos::null : this->getMap ()->getComm ();
3661 if (! comm.is_null ()) {
3665 if (X_ret.is_null ()) {
3668 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3669 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3670 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3671 "MultiVector; the return value of this method) is null on some MPI "
3672 "process in this MultiVector's communicator. This should never "
3673 "happen. Please report this bug to the Tpetra developers.");
3674 if (! X_ret.is_null () &&
3675 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3678 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3679 outArg (gblSuccess));
3680 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3681 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3682 "colRng.size(), on at least one MPI process in this MultiVector's "
3683 "communicator. This should never happen. "
3684 "Please report this bug to the Tpetra developers.");
3691 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3692 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3697 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3701 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3702 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3707 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3711 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3717 using Kokkos::subview;
3718 typedef std::pair<size_t, size_t> range_type;
3719 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3722 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3723 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3724 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3726 static_cast<size_t> (j) :
3728 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3739 const size_t newSize = X.
imports_.extent (0) / numCols;
3740 const size_t offset = jj*newSize;
3742 newImports.d_view = subview (X.
imports_.d_view,
3743 range_type (offset, offset+newSize));
3744 newImports.h_view = subview (X.
imports_.h_view,
3745 range_type (offset, offset+newSize));
3749 const size_t newSize = X.
exports_.extent (0) / numCols;
3750 const size_t offset = jj*newSize;
3752 newExports.d_view = subview (X.
exports_.d_view,
3753 range_type (offset, offset+newSize));
3754 newExports.h_view = subview (X.
exports_.h_view,
3755 range_type (offset, offset+newSize));
3766 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3767 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3772 return Teuchos::rcp (
new V (*
this, j));
3776 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3777 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3782 return Teuchos::rcp (
new V (*
this, j));
3786 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3789 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3792 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3794 Kokkos::MemoryUnmanaged>;
3795 const char tfecfFuncName[] =
"get1dCopy: ";
3797 const size_t numRows = this->getLocalLength ();
3798 const size_t numCols = this->getNumVectors ();
3800 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3801 (LDA < numRows, std::runtime_error,
3802 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3803 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3804 (numRows >
size_t (0) && numCols >
size_t (0) &&
3805 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3807 "A.size() = " << A.size () <<
", but its size must be at least "
3808 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3810 const std::pair<size_t, size_t> rowRange (0, numRows);
3811 const std::pair<size_t, size_t> colRange (0, numCols);
3813 input_view_type A_view_orig (reinterpret_cast<IST*> (A.getRawPtr ()),
3815 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3828 if (this->need_sync_host() && this->need_sync_device()) {
3831 throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
3834 const bool useHostView = view_.host_view_use_count() >= view_.device_view_use_count();
3835 if (this->isConstantStride ()) {
3837 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3841 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3847 for (
size_t j = 0; j < numCols; ++j) {
3848 const size_t srcCol = this->whichVectors_[j];
3849 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3852 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3853 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3857 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3858 auto srcColView_device = Kokkos::subview (srcView_device, rowRange, srcCol);
3868 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3871 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3874 const char tfecfFuncName[] =
"get2dCopy: ";
3875 const size_t numRows = this->getLocalLength ();
3876 const size_t numCols = this->getNumVectors ();
3878 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3879 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3880 std::runtime_error,
"Input array of pointers must contain as many "
3881 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3882 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3884 if (numRows != 0 && numCols != 0) {
3886 for (
size_t j = 0; j < numCols; ++j) {
3887 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3888 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3889 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3890 "the input array of arrays is not long enough to fit all entries in "
3891 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3892 <<
" < getLocalLength() = " << numRows <<
".");
3896 for (
size_t j = 0; j < numCols; ++j) {
3897 Teuchos::RCP<const V> X_j = this->getVector (j);
3898 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3899 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3905 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3906 Teuchos::ArrayRCP<const Scalar>
3910 if (getLocalLength () == 0 || getNumVectors () == 0) {
3911 return Teuchos::null;
3913 TEUCHOS_TEST_FOR_EXCEPTION(
3914 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3915 "get1dView: This MultiVector does not have constant stride, so it is "
3916 "not possible to view its data as a single array. You may check "
3917 "whether a MultiVector has constant stride by calling "
3918 "isConstantStride().");
3922 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3923 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3924 Kokkos::Compat::persistingView (X_lcl);
3925 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3929 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3930 Teuchos::ArrayRCP<Scalar>
3934 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3935 return Teuchos::null;
3938 TEUCHOS_TEST_FOR_EXCEPTION
3939 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3940 "get1dViewNonConst: This MultiVector does not have constant stride, "
3941 "so it is not possible to view its data as a single array. You may "
3942 "check whether a MultiVector has constant stride by calling "
3943 "isConstantStride().");
3944 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3945 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3946 Kokkos::Compat::persistingView (X_lcl);
3947 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3951 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3952 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3956 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3962 const size_t myNumRows = this->getLocalLength ();
3963 const size_t numCols = this->getNumVectors ();
3964 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3966 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3967 for (
size_t j = 0; j < numCols; ++j) {
3968 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3969 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3970 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3971 Kokkos::Compat::persistingView (X_lcl_j);
3972 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3977 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3982 return view_.getHostView(s);
3985 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3990 return view_.getHostView(s);
3993 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3998 return view_.getHostView(s);
4001 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4006 return view_.getDeviceView(s);
4009 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4014 return view_.getDeviceView(s);
4017 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4022 return view_.getDeviceView(s);
4025 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4032 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4033 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
4040 auto X_lcl = getLocalViewHost(Access::ReadOnly);
4046 const size_t myNumRows = this->getLocalLength ();
4047 const size_t numCols = this->getNumVectors ();
4048 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
4050 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
4051 for (
size_t j = 0; j < numCols; ++j) {
4052 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
4053 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
4054 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
4055 Kokkos::Compat::persistingView (X_lcl_j);
4056 views[j] = Teuchos::arcp_reinterpret_cast<
const Scalar> (X_lcl_j_arcp);
4061 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4065 Teuchos::ETransp transB,
4066 const Scalar& alpha,
4071 using ::Tpetra::Details::ProfilingRegion;
4072 using Teuchos::CONJ_TRANS;
4073 using Teuchos::NO_TRANS;
4074 using Teuchos::TRANS;
4077 using Teuchos::rcpFromRef;
4079 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
4081 using STS = Teuchos::ScalarTraits<Scalar>;
4083 const char tfecfFuncName[] =
"multiply: ";
4084 ProfilingRegion region (
"Tpetra::MV::multiply");
4116 const bool C_is_local = ! this->isDistributed ();
4126 auto myMap = this->getMap ();
4127 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
4128 using Teuchos::REDUCE_MIN;
4129 using Teuchos::reduceAll;
4130 using Teuchos::outArg;
4132 auto comm = myMap->getComm ();
4133 const size_t A_nrows =
4135 const size_t A_ncols =
4137 const size_t B_nrows =
4139 const size_t B_ncols =
4142 const bool lclBad = this->getLocalLength () != A_nrows ||
4143 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
4145 const int myRank = comm->getRank ();
4146 std::ostringstream errStrm;
4147 if (this->getLocalLength () != A_nrows) {
4148 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
4149 << this->getLocalLength () <<
" != A_nrows=" << A_nrows
4150 <<
"." << std::endl;
4152 if (this->getNumVectors () != B_ncols) {
4153 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
4154 << this->getNumVectors () <<
" != B_ncols=" << B_ncols
4155 <<
"." << std::endl;
4157 if (A_ncols != B_nrows) {
4158 errStrm <<
"Proc " << myRank <<
": A_ncols="
4159 << A_ncols <<
" != B_nrows=" << B_nrows
4160 <<
"." << std::endl;
4163 const int lclGood = lclBad ? 0 : 1;
4165 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
4167 std::ostringstream os;
4168 ::Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
4170 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4171 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
4172 "least one process in this object's communicator." << std::endl
4174 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
4176 << (transA == Teuchos::TRANS ?
"^T" :
4177 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4178 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
4180 << (transA == Teuchos::TRANS ?
"^T" :
4181 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4182 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
4183 <<
"Global dimensions: C(" << this->getGlobalLength () <<
", "
4193 const bool Case1 = C_is_local && A_is_local && B_is_local;
4195 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
4196 transA != NO_TRANS &&
4199 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
4203 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4204 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
4205 "Multiplication of op(A) and op(B) into *this is not a "
4206 "supported use case.");
4208 if (beta != STS::zero () && Case2) {
4214 const int myRank = this->getMap ()->getComm ()->getRank ();
4216 beta_local = ATS::zero ();
4225 if (! isConstantStride ()) {
4226 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
4228 C_tmp = rcp (
this,
false);
4231 RCP<const MV> A_tmp;
4233 A_tmp = rcp (
new MV (A, Teuchos::Copy));
4235 A_tmp = rcpFromRef (A);
4238 RCP<const MV> B_tmp;
4240 B_tmp = rcp (
new MV (B, Teuchos::Copy));
4242 B_tmp = rcpFromRef (B);
4245 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4246 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
4247 ! A_tmp->isConstantStride (), std::logic_error,
4248 "Failed to make temporary constant-stride copies of MultiVectors.");
4251 const LO A_lclNumRows = A_tmp->getLocalLength ();
4252 const LO A_numVecs = A_tmp->getNumVectors ();
4253 auto A_lcl = A_tmp->getLocalViewDevice(Access::ReadOnly);
4254 auto A_sub = Kokkos::subview (A_lcl,
4255 std::make_pair (LO (0), A_lclNumRows),
4256 std::make_pair (LO (0), A_numVecs));
4259 const LO B_lclNumRows = B_tmp->getLocalLength ();
4260 const LO B_numVecs = B_tmp->getNumVectors ();
4261 auto B_lcl = B_tmp->getLocalViewDevice(Access::ReadOnly);
4262 auto B_sub = Kokkos::subview (B_lcl,
4263 std::make_pair (LO (0), B_lclNumRows),
4264 std::make_pair (LO (0), B_numVecs));
4266 const LO C_lclNumRows = C_tmp->getLocalLength ();
4267 const LO C_numVecs = C_tmp->getNumVectors ();
4269 auto C_lcl = C_tmp->getLocalViewDevice(Access::ReadWrite);
4270 auto C_sub = Kokkos::subview (C_lcl,
4271 std::make_pair (LO (0), C_lclNumRows),
4272 std::make_pair (LO (0), C_numVecs));
4273 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4274 (transA == Teuchos::TRANS ?
'T' :
'C'));
4275 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4276 (transB == Teuchos::TRANS ?
'T' :
'C'));
4279 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
4281 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4285 if (! isConstantStride ()) {
4290 A_tmp = Teuchos::null;
4291 B_tmp = Teuchos::null;
4299 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4308 using Kokkos::subview;
4309 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4311 const size_t lclNumRows = this->getLocalLength ();
4312 const size_t numVecs = this->getNumVectors ();
4314 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4316 std::runtime_error,
"MultiVectors do not have the same local length.");
4317 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4318 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4319 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4322 auto this_view = this->getLocalViewDevice(Access::ReadWrite);
4332 KokkosBlas::mult (scalarThis,
4335 subview (A_view, ALL (), 0),
4339 for (
size_t j = 0; j < numVecs; ++j) {
4340 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4342 KokkosBlas::mult (scalarThis,
4343 subview (this_view, ALL (), C_col),
4345 subview (A_view, ALL (), 0),
4346 subview (B_view, ALL (), B_col));
4351 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4357 using ::Tpetra::Details::ProfilingRegion;
4358 ProfilingRegion region (
"Tpetra::MV::reduce");
4360 const auto map = this->getMap ();
4361 if (map.get () ==
nullptr) {
4364 const auto comm = map->getComm ();
4365 if (comm.get () ==
nullptr) {
4371 const bool changed_on_device = this->need_sync_host ();
4372 if (changed_on_device) {
4376 Kokkos::fence(
"MultiVector::reduce");
4377 auto X_lcl = this->getLocalViewDevice(Access::ReadWrite);
4381 auto X_lcl = this->getLocalViewHost(Access::ReadWrite);
4386 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4394 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4395 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4396 TEUCHOS_TEST_FOR_EXCEPTION(
4397 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4399 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4400 <<
" is invalid. The range of valid row indices on this process "
4401 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4402 <<
", " << maxLocalIndex <<
"].");
4403 TEUCHOS_TEST_FOR_EXCEPTION(
4404 vectorIndexOutOfRange(col),
4406 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4407 <<
" of the multivector is invalid.");
4410 auto view = this->getLocalViewHost(Access::ReadWrite);
4411 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4412 view (lclRow, colInd) = ScalarValue;
4416 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4425 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4426 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4427 TEUCHOS_TEST_FOR_EXCEPTION(
4428 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4430 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4431 <<
" is invalid. The range of valid row indices on this process "
4432 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4433 <<
", " << maxLocalIndex <<
"].");
4434 TEUCHOS_TEST_FOR_EXCEPTION(
4435 vectorIndexOutOfRange(col),
4437 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4438 <<
" of the multivector is invalid.");
4441 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4443 auto view = this->getLocalViewHost(Access::ReadWrite);
4445 Kokkos::atomic_add (& (view(lclRow, colInd)), value);
4448 view(lclRow, colInd) += value;
4453 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4462 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4465 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4466 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4467 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4469 "Global row index " << gblRow <<
" is not present on this process "
4470 << this->getMap ()->getComm ()->getRank () <<
".");
4471 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4472 (this->vectorIndexOutOfRange (col), std::runtime_error,
4473 "Vector index " << col <<
" of the MultiVector is invalid.");
4476 this->replaceLocalValue (lclRow, col, ScalarValue);
4479 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4489 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4492 TEUCHOS_TEST_FOR_EXCEPTION(
4493 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4495 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4496 <<
" is not present on this process "
4497 << this->getMap ()->getComm ()->getRank () <<
".");
4498 TEUCHOS_TEST_FOR_EXCEPTION(
4499 vectorIndexOutOfRange(col),
4501 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4502 <<
" of the multivector is invalid.");
4505 this->sumIntoLocalValue (lclRow, col, value, atomic);
4508 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4510 Teuchos::ArrayRCP<T>
4516 typename dual_view_type::array_layout,
4518 const size_t col = isConstantStride () ? j : whichVectors_[j];
4519 col_dual_view_type X_col =
4520 Kokkos::subview (view_, Kokkos::ALL (), col);
4521 return Kokkos::Compat::persistingView (X_col.d_view);
4524 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4531 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4538 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4543 using Teuchos::TypeNameTraits;
4545 std::ostringstream out;
4546 out <<
"\"" << className <<
"\": {";
4547 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4548 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4549 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4550 <<
", Node" << Node::name ()
4552 if (this->getObjectLabel () !=
"") {
4553 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4555 out <<
", numRows: " << this->getGlobalLength ();
4556 if (className !=
"Tpetra::Vector") {
4557 out <<
", numCols: " << this->getNumVectors ()
4558 <<
", isConstantStride: " << this->isConstantStride ();
4560 if (this->isConstantStride ()) {
4561 out <<
", columnStride: " << this->getStride ();
4568 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4573 return this->descriptionImpl (
"Tpetra::MultiVector");
4578 template<
typename ViewType>
4579 void print_vector(Teuchos::FancyOStream & out,
const char* prefix,
const ViewType& v)
4582 static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
4583 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
4584 static_assert(ViewType::rank == 2,
4585 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
4588 out <<
"Values("<<prefix<<
"): " << std::endl
4590 const size_t numRows = v.extent(0);
4591 const size_t numCols = v.extent(1);
4593 for (
size_t i = 0; i < numRows; ++i) {
4595 if (i + 1 < numRows) {
4601 for (
size_t i = 0; i < numRows; ++i) {
4602 for (
size_t j = 0; j < numCols; ++j) {
4604 if (j + 1 < numCols) {
4608 if (i + 1 < numRows) {
4617 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4622 typedef LocalOrdinal LO;
4625 if (vl <= Teuchos::VERB_LOW) {
4626 return std::string ();
4628 auto map = this->getMap ();
4629 if (map.is_null ()) {
4630 return std::string ();
4632 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4633 auto outp = Teuchos::getFancyOStream (outStringP);
4634 Teuchos::FancyOStream& out = *outp;
4635 auto comm = map->getComm ();
4636 const int myRank = comm->getRank ();
4637 const int numProcs = comm->getSize ();
4639 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4640 Teuchos::OSTab tab1 (out);
4643 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4644 out <<
"Local number of rows: " << lclNumRows << endl;
4646 if (vl > Teuchos::VERB_MEDIUM) {
4649 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4650 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4652 if (this->isConstantStride ()) {
4653 out <<
"Column stride: " << this->getStride () << endl;
4656 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4664 auto X_dev = view_.getDeviceCopy();
4665 auto X_host = view_.getHostCopy();
4667 if(X_dev.data() == X_host.data()) {
4669 Details::print_vector(out,
"unified",X_host);
4672 Details::print_vector(out,
"host",X_host);
4673 Details::print_vector(out,
"dev",X_dev);
4678 return outStringP->str ();
4681 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4685 const std::string& className,
4686 const Teuchos::EVerbosityLevel verbLevel)
const
4688 using Teuchos::TypeNameTraits;
4689 using Teuchos::VERB_DEFAULT;
4690 using Teuchos::VERB_NONE;
4691 using Teuchos::VERB_LOW;
4693 const Teuchos::EVerbosityLevel vl =
4694 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4696 if (vl == VERB_NONE) {
4703 auto map = this->getMap ();
4704 if (map.is_null ()) {
4707 auto comm = map->getComm ();
4708 if (comm.is_null ()) {
4712 const int myRank = comm->getRank ();
4721 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4725 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4726 out <<
"\"" << className <<
"\":" << endl;
4727 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4729 out <<
"Template parameters:" << endl;
4730 Teuchos::OSTab tab2 (out);
4731 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4732 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4733 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4734 <<
"Node: " << Node::name () << endl;
4736 if (this->getObjectLabel () !=
"") {
4737 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4739 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4740 if (className !=
"Tpetra::Vector") {
4741 out <<
"Number of columns: " << this->getNumVectors () << endl;
4748 if (vl > VERB_LOW) {
4749 const std::string lclStr = this->localDescribeToString (vl);
4750 ::Tpetra::Details::gathervPrint (out, lclStr, *comm);
4754 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4758 const Teuchos::EVerbosityLevel verbLevel)
const
4760 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4763 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4768 replaceMap (newMap);
4771 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4776 using ::Tpetra::Details::localDeepCopy;
4777 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4779 TEUCHOS_TEST_FOR_EXCEPTION
4781 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4782 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4783 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4784 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4785 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4787 TEUCHOS_TEST_FOR_EXCEPTION
4788 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4789 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4790 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4791 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4806 if (src_last_updated_on_host) {
4807 localDeepCopy (this->getLocalViewHost(Access::ReadWrite),
4809 this->isConstantStride (),
4811 this->whichVectors_ (),
4815 localDeepCopy (this->getLocalViewDevice(Access::ReadWrite),
4817 this->isConstantStride (),
4819 this->whichVectors_ (),
4824 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4826 Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
4833 this->getNumVectors(),
4839 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4844 using ::Tpetra::Details::PackTraits;
4846 const size_t l1 = this->getLocalLength();
4848 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4855 template <
class ST,
class LO,
class GO,
class NT>
4858 std::swap(mv.
map_, this->map_);
4859 std::swap(mv.
view_, this->view_);
4863 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4864 template <
class ST,
class LO,
class GO,
class NT>
4867 const Teuchos::SerialDenseMatrix<int, ST>& src)
4869 using ::Tpetra::Details::localDeepCopy;
4871 using IST =
typename MV::impl_scalar_type;
4872 using input_view_type =
4873 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4874 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4875 using pair_type = std::pair<LO, LO>;
4877 const LO numRows =
static_cast<LO
> (src.numRows ());
4878 const LO numCols =
static_cast<LO
> (src.numCols ());
4880 TEUCHOS_TEST_FOR_EXCEPTION
4883 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4884 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4886 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4888 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4889 input_view_type src_orig (src_raw, src.stride (), numCols);
4890 input_view_type src_view =
4891 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4893 constexpr
bool src_isConstantStride =
true;
4894 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4898 src_isConstantStride,
4899 getMultiVectorWhichVectors (dst),
4903 template <
class ST,
class LO,
class GO,
class NT>
4905 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4908 using ::Tpetra::Details::localDeepCopy;
4910 using IST =
typename MV::impl_scalar_type;
4911 using output_view_type =
4912 Kokkos::View<IST**, Kokkos::LayoutLeft,
4913 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4914 using pair_type = std::pair<LO, LO>;
4916 const LO numRows =
static_cast<LO
> (dst.numRows ());
4917 const LO numCols =
static_cast<LO
> (dst.numCols ());
4919 TEUCHOS_TEST_FOR_EXCEPTION
4922 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4923 << src.
getMap ()->getComm ()->getRank () <<
", src is "
4925 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4927 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4928 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4930 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4932 constexpr
bool dst_isConstantStride =
true;
4933 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4936 localDeepCopy (dst_view,
4938 dst_isConstantStride,
4941 getMultiVectorWhichVectors (src));
4943 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4945 template <
class Scalar,
class LO,
class GO,
class NT>
4946 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4951 return Teuchos::rcp (
new MV (map, numVectors));
4954 template <
class ST,
class LO,
class GO,
class NT>
4972 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4973 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4974 template class MultiVector< SCALAR , LO , GO , NODE >; \
4975 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4976 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4977 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4978 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4981 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4982 template class MultiVector< SCALAR , LO , GO , NODE >; \
4983 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4985 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4988 #define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
4990 template Teuchos::RCP< MultiVector< SO , LO , GO , NODE > > \
4991 MultiVector< SI , LO , GO , NODE >::convert< SO > () const;
4994 #endif // TPETRA_MULTIVECTOR_DEF_HPP
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
One or more distributed dense vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
static bool debug()
Whether Tpetra is in debug mode.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
int local_ordinal_type
Default value of Scalar template parameter.
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Insert new values that don't currently exist.
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
wrapped_dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
Functions for initializing and finalizing Tpetra.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Declaration of Tpetra::Details::isInterComm.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
Replace existing values with new values.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
void start()
Start the deep_copy counter.
A parallel distribution of indices over processes.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
Base class for distributed Tpetra objects that support data redistribution.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
EWhichNorm
Input argument for normImpl() (which see).
Accumulate new values into existing values (may not be supported in all classes)
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.