11 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
12 #define TPETRA_MULTIVECTOR_DEF_HPP
24 #include "Tpetra_Vector.hpp"
36 #include "Tpetra_Details_Random.hpp"
37 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
38 # include "Teuchos_SerialDenseMatrix.hpp"
39 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
40 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
41 #include "KokkosCompat_View.hpp"
42 #include "KokkosBlas.hpp"
43 #include "KokkosKernels_Utils.hpp"
44 #include "Kokkos_Random.hpp"
45 #include "Kokkos_ArithTraits.hpp"
49 #ifdef HAVE_TPETRA_INST_FLOAT128
52 template<
class Generator>
53 struct rand<Generator, __float128> {
54 static KOKKOS_INLINE_FUNCTION __float128 max ()
56 return static_cast<__float128
> (1.0);
58 static KOKKOS_INLINE_FUNCTION __float128
63 const __float128 scalingFactor =
64 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
65 static_cast<__float128> (2.0);
66 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
67 const __float128 lowerOrderTerm =
68 static_cast<__float128
> (gen.drand ()) * scalingFactor;
69 return higherOrderTerm + lowerOrderTerm;
71 static KOKKOS_INLINE_FUNCTION __float128
72 draw (Generator& gen,
const __float128& range)
75 const __float128 scalingFactor =
76 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
77 static_cast<__float128> (2.0);
78 const __float128 higherOrderTerm =
79 static_cast<__float128
> (gen.drand (range));
80 const __float128 lowerOrderTerm =
81 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
82 return higherOrderTerm + lowerOrderTerm;
84 static KOKKOS_INLINE_FUNCTION __float128
85 draw (Generator& gen,
const __float128&
start,
const __float128& end)
88 const __float128 scalingFactor =
89 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
90 static_cast<__float128> (2.0);
91 const __float128 higherOrderTerm =
92 static_cast<__float128
> (gen.drand (start, end));
93 const __float128 lowerOrderTerm =
94 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
95 return higherOrderTerm + lowerOrderTerm;
99 #endif // HAVE_TPETRA_INST_FLOAT128
117 template<
class ST,
class LO,
class GO,
class NT>
119 allocDualView (
const size_t lclNumRows,
120 const size_t numCols,
121 const bool zeroOut =
true,
122 const bool allowPadding =
false)
124 using ::Tpetra::Details::Behavior;
125 using Kokkos::AllowPadding;
126 using Kokkos::view_alloc;
127 using Kokkos::WithoutInitializing;
130 typedef typename dual_view_type::t_dev dev_view_type;
136 const std::string label (
"MV::DualView");
137 const bool debug = Behavior::debug ();
147 dev_view_type d_view;
150 d_view = dev_view_type (view_alloc (label, AllowPadding),
151 lclNumRows, numCols);
154 d_view = dev_view_type (view_alloc (label),
155 lclNumRows, numCols);
160 d_view = dev_view_type (view_alloc (label,
163 lclNumRows, numCols);
166 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
167 lclNumRows, numCols);
178 const ST nan = Kokkos::ArithTraits<ST>::nan ();
179 KokkosBlas::fill (d_view, nan);
183 TEUCHOS_TEST_FOR_EXCEPTION
184 (static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
185 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
186 "allocDualView: d_view's dimensions actual dimensions do not match "
187 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
188 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
189 <<
". Please report this bug to the Tpetra developers.");
192 return wrapped_dual_view_type(d_view);
199 template<
class T,
class ExecSpace>
200 struct MakeUnmanagedView {
210 typedef typename std::conditional<
211 Kokkos::SpaceAccessibility<
212 typename ExecSpace::memory_space,
213 Kokkos::HostSpace>::accessible,
214 typename ExecSpace::device_type,
215 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
216 typedef Kokkos::LayoutLeft array_layout;
217 typedef Kokkos::View<T*, array_layout, host_exec_space,
218 Kokkos::MemoryUnmanaged> view_type;
220 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
222 const size_t numEnt =
static_cast<size_t> (x_in.size ());
226 return view_type (x_in.getRawPtr (), numEnt);
232 template<
class WrappedDualViewType>
234 takeSubview (
const WrappedDualViewType& X,
235 const std::pair<size_t, size_t>& rowRng,
236 const Kokkos::ALL_t& colRng)
240 return WrappedDualViewType(X,rowRng,colRng);
243 template<
class WrappedDualViewType>
245 takeSubview (
const WrappedDualViewType& X,
246 const Kokkos::ALL_t& rowRng,
247 const std::pair<size_t, size_t>& colRng)
249 using DualViewType =
typename WrappedDualViewType::DVT;
252 if (X.extent (0) == 0 && X.extent (1) != 0) {
253 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
256 return WrappedDualViewType(X,rowRng,colRng);
260 template<
class WrappedDualViewType>
262 takeSubview (
const WrappedDualViewType& X,
263 const std::pair<size_t, size_t>& rowRng,
264 const std::pair<size_t, size_t>& colRng)
266 using DualViewType =
typename WrappedDualViewType::DVT;
276 if (X.extent (0) == 0 && X.extent (1) != 0) {
277 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
280 return WrappedDualViewType(X,rowRng,colRng);
284 template<
class WrappedOrNotDualViewType>
286 getDualViewStride (
const WrappedOrNotDualViewType& dv)
292 size_t strides[WrappedOrNotDualViewType::t_dev::rank+1];
294 const size_t LDA = strides[1];
295 const size_t numRows = dv.extent (0);
298 return (numRows == 0) ? size_t (1) : numRows;
305 template<
class ViewType>
307 getViewStride (
const ViewType& view)
309 const size_t LDA = view.stride (1);
310 const size_t numRows = view.extent (0);
313 return (numRows == 0) ? size_t (1) : numRows;
320 template <
class impl_scalar_type,
class buffer_device_type>
323 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports
326 if (! imports.need_sync_device ()) {
331 size_t localLengthThreshold =
333 return imports.extent(0) <= localLengthThreshold;
338 template <
class SC,
class LO,
class GO,
class NT>
340 runKernelOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
342 if (! X.need_sync_device ()) {
347 size_t localLengthThreshold =
349 return X.getLocalLength () <= localLengthThreshold;
353 template <
class SC,
class LO,
class GO,
class NT>
355 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
359 using namespace Tpetra;
362 using val_type =
typename MV::impl_scalar_type;
363 using mag_type =
typename MV::mag_type;
364 using dual_view_type =
typename MV::dual_view_type;
367 auto comm = map.is_null () ?
nullptr : map->getComm ().getRawPtr ();
368 auto whichVecs = getMultiVectorWhichVectors (X);
372 const bool runOnHost = runKernelOnHost (X);
374 using view_type =
typename dual_view_type::t_host;
375 using array_layout =
typename view_type::array_layout;
376 using device_type =
typename view_type::device_type;
379 normImpl<val_type, array_layout, device_type,
380 mag_type> (norms, X_lcl, whichNorm, whichVecs,
381 isConstantStride, isDistributed, comm);
384 using view_type =
typename dual_view_type::t_dev;
385 using array_layout =
typename view_type::array_layout;
386 using device_type =
typename view_type::device_type;
389 normImpl<val_type, array_layout, device_type,
390 mag_type> (norms, X_lcl, whichNorm, whichVecs,
391 isConstantStride, isDistributed, comm);
400 template <
typename DstView,
typename SrcView>
401 struct AddAssignFunctor {
404 AddAssignFunctor(DstView &tgt_, SrcView &src_) : tgt(tgt_), src(src_) {}
406 KOKKOS_INLINE_FUNCTION
void
407 operator () (
const size_t k)
const { tgt(k) += src(k); }
414 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
418 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
421 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
427 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
430 const size_t numVecs,
431 const bool zeroOut) :
437 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
440 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
443 const Teuchos::DataAccess copyOrView) :
445 view_ (source.view_),
446 whichVectors_ (source.whichVectors_)
449 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
450 "const Teuchos::DataAccess): ";
454 if (copyOrView == Teuchos::Copy) {
458 this->
view_ = cpy.view_;
461 else if (copyOrView == Teuchos::View) {
464 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
465 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
466 "invalid value " << copyOrView <<
". Valid values include "
467 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
468 << Teuchos::View <<
".");
472 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
475 const dual_view_type& view) :
477 view_ (wrapped_dual_view_type(view))
479 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
480 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
481 map->getLocalNumElements ();
482 const size_t lclNumRows_view = view.extent (0);
483 const size_t LDA = getDualViewStride (
view_);
485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
486 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
487 std::invalid_argument,
"Kokkos::DualView does not match Map. "
488 "map->getLocalNumElements() = " << lclNumRows_map
489 <<
", view.extent(0) = " << lclNumRows_view
490 <<
", and getStride() = " << LDA <<
".");
492 using ::Tpetra::Details::Behavior;
493 const bool debug = Behavior::debug ();
495 using ::Tpetra::Details::checkGlobalDualViewValidity;
496 std::ostringstream gblErrStrm;
497 const bool verbose = Behavior::verbose ();
498 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
499 const bool gblValid =
500 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
502 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
503 (! gblValid, std::runtime_error, gblErrStrm.str ());
508 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
511 const wrapped_dual_view_type& view) :
515 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
516 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
517 map->getLocalNumElements ();
518 const size_t lclNumRows_view = view.extent (0);
519 const size_t LDA = getDualViewStride (view);
521 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
522 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
523 std::invalid_argument,
"Kokkos::DualView does not match Map. "
524 "map->getLocalNumElements() = " << lclNumRows_map
525 <<
", view.extent(0) = " << lclNumRows_view
526 <<
", and getStride() = " << LDA <<
".");
528 using ::Tpetra::Details::Behavior;
529 const bool debug = Behavior::debug ();
531 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
532 std::ostringstream gblErrStrm;
533 const bool verbose = Behavior::verbose ();
534 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
535 const bool gblValid =
536 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
538 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
539 (! gblValid, std::runtime_error, gblErrStrm.str ());
545 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
548 const typename dual_view_type::t_dev& d_view) :
551 using Teuchos::ArrayRCP;
553 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
557 const size_t LDA = getViewStride (d_view);
558 const size_t lclNumRows = map->getLocalNumElements ();
559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
560 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
561 "Kokkos::View. map->getLocalNumElements() = " << lclNumRows
562 <<
", View's column stride = " << LDA
563 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
565 auto h_view = Kokkos::create_mirror_view (d_view);
567 view_ = wrapped_dual_view_type(dual_view);
569 using ::Tpetra::Details::Behavior;
570 const bool debug = Behavior::debug ();
572 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
573 std::ostringstream gblErrStrm;
574 const bool verbose = Behavior::verbose ();
575 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
576 const bool gblValid =
577 checkGlobalWrappedDualViewValidity (&gblErrStrm,
view_, verbose,
579 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
580 (! gblValid, std::runtime_error, gblErrStrm.str ());
585 dual_view.modify_device ();
588 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
591 const dual_view_type& view,
592 const dual_view_type& origView) :
594 view_ (wrapped_dual_view_type(view,origView))
596 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
598 const size_t LDA = getDualViewStride (origView);
600 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
601 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
602 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
603 <<
". This may also mean that the input origView's first dimension (number "
604 "of rows = " << origView.extent (0) <<
") does not not match the number "
605 "of entries in the Map on the calling process.");
607 using ::Tpetra::Details::Behavior;
608 const bool debug = Behavior::debug ();
610 using ::Tpetra::Details::checkGlobalDualViewValidity;
611 std::ostringstream gblErrStrm;
612 const bool verbose = Behavior::verbose ();
613 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
614 const bool gblValid_0 =
615 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
617 const bool gblValid_1 =
618 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
620 const bool gblValid = gblValid_0 && gblValid_1;
621 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
622 (! gblValid, std::runtime_error, gblErrStrm.str ());
627 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
630 const dual_view_type& view,
631 const Teuchos::ArrayView<const size_t>& whichVectors) :
634 whichVectors_ (whichVectors.begin (), whichVectors.end ())
637 using Kokkos::subview;
638 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
640 using ::Tpetra::Details::Behavior;
641 const bool debug = Behavior::debug ();
643 using ::Tpetra::Details::checkGlobalDualViewValidity;
644 std::ostringstream gblErrStrm;
645 const bool verbose = Behavior::verbose ();
646 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
647 const bool gblValid =
648 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
651 (! gblValid, std::runtime_error, gblErrStrm.str ());
654 const size_t lclNumRows = map.is_null () ? size_t (0) :
655 map->getLocalNumElements ();
662 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
663 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
664 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
665 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
666 if (whichVectors.size () != 0) {
667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
668 view.extent (1) != 0 && view.extent (1) == 0,
669 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
670 " = " << whichVectors.size () <<
" > 0.");
671 size_t maxColInd = 0;
672 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
673 for (size_type k = 0; k < whichVectors.size (); ++k) {
674 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
675 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
676 std::invalid_argument,
"whichVectors[" << k <<
"] = "
677 "Teuchos::OrdinalTraits<size_t>::invalid().");
678 maxColInd = std::max (maxColInd, whichVectors[k]);
680 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
681 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
682 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
683 <<
" <= max(whichVectors) = " << maxColInd <<
".");
688 const size_t LDA = getDualViewStride (view);
689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
690 (LDA < lclNumRows, std::invalid_argument,
691 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
693 if (whichVectors.size () == 1) {
703 const std::pair<size_t, size_t> colRng (whichVectors[0],
704 whichVectors[0] + 1);
711 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
714 const wrapped_dual_view_type& view,
715 const Teuchos::ArrayView<const size_t>& whichVectors) :
718 whichVectors_ (whichVectors.begin (), whichVectors.end ())
721 using Kokkos::subview;
722 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
724 using ::Tpetra::Details::Behavior;
725 const bool debug = Behavior::debug ();
727 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
728 std::ostringstream gblErrStrm;
729 const bool verbose = Behavior::verbose ();
730 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
731 const bool gblValid =
732 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
734 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
735 (! gblValid, std::runtime_error, gblErrStrm.str ());
738 const size_t lclNumRows = map.is_null () ? size_t (0) :
739 map->getLocalNumElements ();
746 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
747 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
748 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
749 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
750 if (whichVectors.size () != 0) {
751 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
752 view.extent (1) != 0 && view.extent (1) == 0,
753 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
754 " = " << whichVectors.size () <<
" > 0.");
755 size_t maxColInd = 0;
756 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
757 for (size_type k = 0; k < whichVectors.size (); ++k) {
758 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
759 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
760 std::invalid_argument,
"whichVectors[" << k <<
"] = "
761 "Teuchos::OrdinalTraits<size_t>::invalid().");
762 maxColInd = std::max (maxColInd, whichVectors[k]);
764 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
765 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
766 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
767 <<
" <= max(whichVectors) = " << maxColInd <<
".");
772 const size_t LDA = getDualViewStride (view);
773 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
774 (LDA < lclNumRows, std::invalid_argument,
775 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
777 if (whichVectors.size () == 1) {
787 const std::pair<size_t, size_t> colRng (whichVectors[0],
788 whichVectors[0] + 1);
796 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
799 const dual_view_type& view,
800 const dual_view_type& origView,
801 const Teuchos::ArrayView<const size_t>& whichVectors) :
803 view_ (wrapped_dual_view_type(view,origView)),
804 whichVectors_ (whichVectors.begin (), whichVectors.end ())
807 using Kokkos::subview;
808 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
810 using ::Tpetra::Details::Behavior;
811 const bool debug = Behavior::debug ();
813 using ::Tpetra::Details::checkGlobalDualViewValidity;
814 std::ostringstream gblErrStrm;
815 const bool verbose = Behavior::verbose ();
816 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
817 const bool gblValid_0 =
818 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
820 const bool gblValid_1 =
821 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
823 const bool gblValid = gblValid_0 && gblValid_1;
824 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
825 (! gblValid, std::runtime_error, gblErrStrm.str ());
835 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
836 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
837 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
838 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
839 if (whichVectors.size () != 0) {
840 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
841 view.extent (1) != 0 && view.extent (1) == 0,
842 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
843 " = " << whichVectors.size () <<
" > 0.");
844 size_t maxColInd = 0;
845 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
846 for (size_type k = 0; k < whichVectors.size (); ++k) {
847 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
848 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
849 std::invalid_argument,
"whichVectors[" << k <<
"] = "
850 "Teuchos::OrdinalTraits<size_t>::invalid().");
851 maxColInd = std::max (maxColInd, whichVectors[k]);
853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
854 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
855 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
856 <<
" <= max(whichVectors) = " << maxColInd <<
".");
861 const size_t LDA = getDualViewStride (origView);
862 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
863 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
864 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
865 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
866 <<
", origView.stride(1) = " << origView.d_view.stride(1) <<
".");
868 if (whichVectors.size () == 1) {
877 const std::pair<size_t, size_t> colRng (whichVectors[0],
878 whichVectors[0] + 1);
886 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
889 const Teuchos::ArrayView<const Scalar>& data,
891 const size_t numVecs) :
894 typedef LocalOrdinal LO;
895 typedef GlobalOrdinal GO;
896 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
902 const size_t lclNumRows =
903 map.is_null () ? size_t (0) : map->getLocalNumElements ();
904 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
905 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
906 "map->getLocalNumElements() = " << lclNumRows <<
".");
908 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
909 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
910 (static_cast<size_t> (data.size ()) < minNumEntries,
911 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
912 "entries, given the input Map and number of vectors in the MultiVector."
913 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
914 "map->getLocalNumElements () = " << minNumEntries <<
".");
917 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
929 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
930 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
931 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
936 const size_t outStride =
937 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
938 if (LDA == outStride) {
945 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
947 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
949 for (
size_t j = 0; j < numVecs; ++j) {
950 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
951 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
958 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
961 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
962 const size_t numVecs) :
966 typedef LocalOrdinal LO;
967 typedef GlobalOrdinal GO;
968 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
971 const size_t lclNumRows =
972 map.is_null () ? size_t (0) : map->getLocalNumElements ();
973 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
974 (numVecs < 1 || numVecs != static_cast<size_t> (ArrayOfPtrs.size ()),
975 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
976 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
977 for (
size_t j = 0; j < numVecs; ++j) {
978 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
979 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
980 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
981 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
982 << X_j_av.size () <<
" < map->getLocalNumElements() = " << lclNumRows
986 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
991 using array_layout =
typename decltype (X_out)::array_layout;
992 using input_col_view_type =
typename Kokkos::View<
const IST*,
995 Kokkos::MemoryUnmanaged>;
997 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
998 for (
size_t j = 0; j < numVecs; ++j) {
999 Teuchos::ArrayView<const IST> X_j_av =
1000 Teuchos::av_reinterpret_cast<
const IST> (ArrayOfPtrs[j]);
1001 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
1002 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
1008 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1011 return whichVectors_.empty ();
1014 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1019 if (this->getMap ().is_null ()) {
1020 return static_cast<size_t> (0);
1022 return this->getMap ()->getLocalNumElements ();
1026 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1031 if (this->getMap ().is_null ()) {
1032 return static_cast<size_t> (0);
1034 return this->getMap ()->getGlobalNumElements ();
1038 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1043 return isConstantStride () ? getDualViewStride (view_) : size_t (0);
1046 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1052 auto thisData = view_.getDualView().h_view.data();
1053 auto otherData = other.
view_.getDualView().h_view.data();
1054 size_t thisSpan = view_.getDualView().h_view.span();
1055 size_t otherSpan = other.
view_.getDualView().h_view.span();
1056 return (otherData <= thisData && thisData < otherData + otherSpan)
1057 || (thisData <= otherData && otherData < thisData + thisSpan);
1060 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1069 const MV* src =
dynamic_cast<const MV*
> (&sourceObj);
1070 if (src ==
nullptr) {
1084 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1088 return this->getNumVectors ();
1091 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1096 const size_t numSameIDs,
1097 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1098 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1100 const execution_space &space)
1102 using ::Tpetra::Details::Behavior;
1104 using ::Tpetra::Details::ProfilingRegion;
1106 using KokkosRefactor::Details::permute_array_multi_column;
1107 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
1108 using Kokkos::Compat::create_const_view;
1112 MV& sourceMV =
const_cast<MV &
>(
dynamic_cast<const MV&
> (sourceObj));
1113 const bool copyOnHost = runKernelOnHost(sourceMV);
1114 const char longFuncNameHost[] =
"Tpetra::MultiVector::copyAndPermute[Host]";
1115 const char longFuncNameDevice[] =
"Tpetra::MultiVector::copyAndPermute[Device]";
1116 const char tfecfFuncName[] =
"copyAndPermute: ";
1117 ProfilingRegion regionCAP (copyOnHost ? longFuncNameHost : longFuncNameDevice);
1119 const bool verbose = Behavior::verbose ();
1120 std::unique_ptr<std::string> prefix;
1122 auto map = this->getMap ();
1123 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1124 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1125 std::ostringstream os;
1126 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
1127 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1128 os <<
"Start" << endl;
1129 std::cerr << os.str ();
1132 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1133 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
1134 std::logic_error,
"permuteToLIDs.extent(0) = "
1135 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
1136 << permuteFromLIDs.extent (0) <<
".");
1137 const size_t numCols = this->getNumVectors ();
1141 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1142 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1143 std::logic_error,
"Input MultiVector needs sync to both host "
1146 std::ostringstream os;
1147 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1148 std::cerr << os.str ();
1152 std::ostringstream os;
1153 os << *prefix <<
"Copy" << endl;
1154 std::cerr << os.str ();
1179 if (numSameIDs > 0) {
1180 const std::pair<size_t, size_t> rows (0, numSameIDs);
1182 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1183 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1185 for (
size_t j = 0; j < numCols; ++j) {
1186 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1187 const size_t srcCol =
1188 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1190 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1191 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1195 Kokkos::RangePolicy<execution_space, size_t>;
1196 range_t rp(space, 0,numSameIDs);
1197 Tpetra::Details::AddAssignFunctor<decltype(tgt_j), decltype(src_j)>
1199 Kokkos::parallel_for(rp, aaf);
1210 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1211 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1213 for (
size_t j = 0; j < numCols; ++j) {
1214 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1215 const size_t srcCol =
1216 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1218 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1219 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1223 Kokkos::RangePolicy<execution_space, size_t>;
1224 range_t rp(space, 0,numSameIDs);
1225 Tpetra::Details::AddAssignFunctor<decltype(tgt_j), decltype(src_j)>
1227 Kokkos::parallel_for(rp, aaf);
1250 if (permuteFromLIDs.extent (0) == 0 ||
1251 permuteToLIDs.extent (0) == 0) {
1253 std::ostringstream os;
1254 os << *prefix <<
"No permutations. Done!" << endl;
1255 std::cerr << os.str ();
1261 std::ostringstream os;
1262 os << *prefix <<
"Permute" << endl;
1263 std::cerr << os.str ();
1268 const bool nonConstStride =
1269 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1272 std::ostringstream os;
1273 os << *prefix <<
"nonConstStride="
1274 << (nonConstStride ?
"true" :
"false") << endl;
1275 std::cerr << os.str ();
1282 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1283 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1284 if (nonConstStride) {
1285 if (this->whichVectors_.size () == 0) {
1286 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1287 tmpTgt.modify_host ();
1288 for (
size_t j = 0; j < numCols; ++j) {
1289 tmpTgt.h_view(j) = j;
1292 tmpTgt.sync_device ();
1294 tgtWhichVecs = tmpTgt;
1297 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1299 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1303 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1304 (static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1305 this->getNumVectors (),
1306 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1307 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1308 this->getNumVectors () <<
".");
1310 if (sourceMV.whichVectors_.size () == 0) {
1311 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1312 tmpSrc.modify_host ();
1313 for (
size_t j = 0; j < numCols; ++j) {
1314 tmpSrc.h_view(j) = j;
1317 tmpSrc.sync_device ();
1319 srcWhichVecs = tmpSrc;
1322 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1323 sourceMV.whichVectors_ ();
1325 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1329 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1330 (static_cast<size_t> (srcWhichVecs.extent (0)) !=
1331 sourceMV.getNumVectors (), std::logic_error,
1332 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1333 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1339 std::ostringstream os;
1340 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1341 std::cerr << os.str ();
1343 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1344 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1346 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1347 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1348 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1349 auto permuteFromLIDs_h =
1350 create_const_view (permuteFromLIDs.view_host ());
1353 std::ostringstream os;
1354 os << *prefix <<
"Permute on host" << endl;
1355 std::cerr << os.str ();
1357 if (nonConstStride) {
1360 auto tgtWhichVecs_h =
1361 create_const_view (tgtWhichVecs.view_host ());
1362 auto srcWhichVecs_h =
1363 create_const_view (srcWhichVecs.view_host ());
1365 using op_type = KokkosRefactor::Details::AddOp;
1366 permute_array_multi_column_variable_stride (tgt_h, src_h,
1370 srcWhichVecs_h, numCols,
1374 using op_type = KokkosRefactor::Details::InsertOp;
1375 permute_array_multi_column_variable_stride (tgt_h, src_h,
1379 srcWhichVecs_h, numCols,
1385 using op_type = KokkosRefactor::Details::AddOp;
1386 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1387 permuteFromLIDs_h, numCols, op_type());
1390 using op_type = KokkosRefactor::Details::InsertOp;
1391 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1392 permuteFromLIDs_h, numCols, op_type());
1398 std::ostringstream os;
1399 os << *prefix <<
"Get permute LIDs on device" << endl;
1400 std::cerr << os.str ();
1402 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1403 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1405 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1406 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1407 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1408 auto permuteFromLIDs_d =
1409 create_const_view (permuteFromLIDs.view_device ());
1412 std::ostringstream os;
1413 os << *prefix <<
"Permute on device" << endl;
1414 std::cerr << os.str ();
1416 if (nonConstStride) {
1419 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1420 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1422 using op_type = KokkosRefactor::Details::AddOp;
1423 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1427 srcWhichVecs_d, numCols,
1431 using op_type = KokkosRefactor::Details::InsertOp;
1432 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1436 srcWhichVecs_d, numCols,
1442 using op_type = KokkosRefactor::Details::AddOp;
1443 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1444 permuteFromLIDs_d, numCols, op_type());
1447 using op_type = KokkosRefactor::Details::InsertOp;
1448 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1449 permuteFromLIDs_d, numCols, op_type());
1455 std::ostringstream os;
1456 os << *prefix <<
"Done!" << endl;
1457 std::cerr << os.str ();
1462 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1465 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1467 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1470 copyAndPermute(sourceObj, numSameIDs, permuteToLIDs, permuteFromLIDs, CM,
1475 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1480 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1481 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1482 Kokkos::DualView<size_t*, buffer_device_type> ,
1483 size_t& constantNumPackets,
1484 const execution_space &space)
1486 using ::Tpetra::Details::Behavior;
1487 using ::Tpetra::Details::ProfilingRegion;
1489 using Kokkos::Compat::create_const_view;
1490 using Kokkos::Compat::getKokkosViewDeepCopy;
1495 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
> (sourceObj));
1497 const bool packOnHost = runKernelOnHost(sourceMV);
1498 const char longFuncNameHost[] =
"Tpetra::MultiVector::packAndPrepare[Host]";
1499 const char longFuncNameDevice[] =
"Tpetra::MultiVector::packAndPrepare[Device]";
1500 const char tfecfFuncName[] =
"packAndPrepare: ";
1501 ProfilingRegion regionPAP (packOnHost ? longFuncNameHost : longFuncNameDevice);
1509 const bool debugCheckIndices = Behavior::debug ();
1514 const bool printDebugOutput = Behavior::verbose ();
1516 std::unique_ptr<std::string> prefix;
1517 if (printDebugOutput) {
1518 auto map = this->getMap ();
1519 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1520 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1521 std::ostringstream os;
1522 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1523 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1524 os <<
"Start" << endl;
1525 std::cerr << os.str ();
1529 const size_t numCols = sourceMV.getNumVectors ();
1534 constantNumPackets = numCols;
1538 if (exportLIDs.extent (0) == 0) {
1539 if (printDebugOutput) {
1540 std::ostringstream os;
1541 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1542 std::cerr << os.str ();
1562 const size_t numExportLIDs = exportLIDs.extent (0);
1563 const size_t newExportsSize = numCols * numExportLIDs;
1564 if (printDebugOutput) {
1565 std::ostringstream os;
1566 os << *prefix <<
"realloc: "
1567 <<
"numExportLIDs: " << numExportLIDs
1568 <<
", exports.extent(0): " << exports.extent (0)
1569 <<
", newExportsSize: " << newExportsSize << std::endl;
1570 std::cerr << os.str ();
1576 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1577 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1578 std::logic_error,
"Input MultiVector needs sync to both host "
1580 if (printDebugOutput) {
1581 std::ostringstream os;
1582 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1583 std::cerr << os.str ();
1595 exports.clear_sync_state ();
1596 exports.modify_host ();
1604 exports.clear_sync_state ();
1605 exports.modify_device ();
1621 if (sourceMV.isConstantStride ()) {
1622 using KokkosRefactor::Details::pack_array_single_column;
1623 if (printDebugOutput) {
1624 std::ostringstream os;
1625 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1626 std::cerr << os.str ();
1629 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1630 pack_array_single_column (exports.view_host (),
1632 exportLIDs.view_host (),
1637 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1638 pack_array_single_column (exports.view_device (),
1640 exportLIDs.view_device (),
1647 using KokkosRefactor::Details::pack_array_single_column;
1648 if (printDebugOutput) {
1649 std::ostringstream os;
1650 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1651 std::cerr << os.str ();
1654 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1655 pack_array_single_column (exports.view_host (),
1657 exportLIDs.view_host (),
1658 sourceMV.whichVectors_[0],
1662 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1663 pack_array_single_column (exports.view_device (),
1665 exportLIDs.view_device (),
1666 sourceMV.whichVectors_[0],
1673 if (sourceMV.isConstantStride ()) {
1674 using KokkosRefactor::Details::pack_array_multi_column;
1675 if (printDebugOutput) {
1676 std::ostringstream os;
1677 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1678 std::cerr << os.str ();
1681 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1682 pack_array_multi_column (exports.view_host (),
1684 exportLIDs.view_host (),
1689 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1690 pack_array_multi_column (exports.view_device (),
1692 exportLIDs.view_device (),
1699 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1700 if (printDebugOutput) {
1701 std::ostringstream os;
1702 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1704 std::cerr << os.str ();
1709 using IST = impl_scalar_type;
1710 using DV = Kokkos::DualView<IST*, device_type>;
1711 using HES =
typename DV::t_host::execution_space;
1712 using DES =
typename DV::t_dev::execution_space;
1713 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1715 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1716 pack_array_multi_column_variable_stride
1717 (exports.view_host (),
1719 exportLIDs.view_host (),
1720 getKokkosViewDeepCopy<HES> (whichVecs),
1725 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1726 pack_array_multi_column_variable_stride
1727 (exports.view_device (),
1729 exportLIDs.view_device (),
1730 getKokkosViewDeepCopy<DES> (whichVecs),
1732 debugCheckIndices, space);
1737 if (printDebugOutput) {
1738 std::ostringstream os;
1739 os << *prefix <<
"Done!" << endl;
1740 std::cerr << os.str ();
1746 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1751 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1752 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1753 Kokkos::DualView<size_t*, buffer_device_type> numExportPacketsPerLID,
1754 size_t& constantNumPackets) {
1755 packAndPrepare(sourceObj, exportLIDs, exports, numExportPacketsPerLID, constantNumPackets, execution_space());
1759 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1761 typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1762 typename NO::device_type::memory_space>::value,
1767 const std::string* prefix,
1768 const bool areRemoteLIDsContiguous,
1777 std::ostringstream os;
1778 os << *prefix <<
"Realloc (if needed) imports_ from "
1779 << this->imports_.extent (0) <<
" to " << newSize << std::endl;
1780 std::cerr << os.str ();
1783 bool reallocated =
false;
1785 using IST = impl_scalar_type;
1786 using DV = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
1798 if ((std::is_same_v<typename dual_view_type::t_dev::device_type, typename dual_view_type::t_host::device_type> ||
1799 (Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_device()) ||
1800 (!Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_host())) &&
1801 areRemoteLIDsContiguous &&
1803 (getNumVectors() == 1) &&
1806 size_t offset = getLocalLength () - newSize;
1807 reallocated = this->imports_.d_view.data() != view_.getDualView().d_view.data() + offset;
1809 typedef std::pair<size_t, size_t> range_type;
1810 this->imports_ = DV(view_.getDualView(),
1811 range_type (offset, getLocalLength () ),
1815 std::ostringstream os;
1816 os << *prefix <<
"Aliased imports_ to MV.view_" << std::endl;
1817 std::cerr << os.str ();
1827 std::ostringstream os;
1828 os << *prefix <<
"Finished realloc'ing unaliased_imports_" << std::endl;
1829 std::cerr << os.str ();
1831 this->imports_ = this->unaliased_imports_;
1836 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1838 typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1839 typename NO::device_type::memory_space>::value,
1844 const std::string* prefix,
1845 const bool areRemoteLIDsContiguous,
1851 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1856 const std::string* prefix,
1857 const bool areRemoteLIDsContiguous,
1860 return reallocImportsIfNeededImpl<Node>(newSize, verbose, prefix, areRemoteLIDsContiguous, CM);
1863 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1867 return (this->imports_.d_view.data() + this->imports_.d_view.extent(0) ==
1868 view_.getDualView().d_view.data() + view_.getDualView().d_view.extent(0));
1872 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1876 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1877 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1878 Kokkos::DualView<size_t*, buffer_device_type> ,
1879 const size_t constantNumPackets,
1881 const execution_space &space)
1883 using ::Tpetra::Details::Behavior;
1884 using ::Tpetra::Details::ProfilingRegion;
1885 using KokkosRefactor::Details::unpack_array_multi_column;
1886 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1887 using Kokkos::Compat::getKokkosViewDeepCopy;
1890 const bool unpackOnHost = runKernelOnHost(imports);
1892 const char longFuncNameHost[] =
"Tpetra::MultiVector::unpackAndCombine[Host]";
1893 const char longFuncNameDevice[] =
"Tpetra::MultiVector::unpackAndCombine[Device]";
1894 const char * longFuncName = unpackOnHost ? longFuncNameHost : longFuncNameDevice;
1895 const char tfecfFuncName[] =
"unpackAndCombine: ";
1896 ProfilingRegion regionUAC (longFuncName);
1904 const bool debugCheckIndices = Behavior::debug ();
1906 const bool printDebugOutput = Behavior::verbose ();
1907 std::unique_ptr<std::string> prefix;
1908 if (printDebugOutput) {
1909 auto map = this->getMap ();
1910 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1911 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1912 std::ostringstream os;
1913 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1914 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1915 os <<
"Start" << endl;
1916 std::cerr << os.str ();
1920 if (importLIDs.extent (0) == 0) {
1921 if (printDebugOutput) {
1922 std::ostringstream os;
1923 os << *prefix <<
"No imports. Done!" << endl;
1924 std::cerr << os.str ();
1930 if (importsAreAliased()) {
1931 if (printDebugOutput) {
1932 std::ostringstream os;
1933 os << *prefix <<
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" << endl;
1934 std::cerr << os.str ();
1940 const size_t numVecs = getNumVectors ();
1941 if (debugCheckIndices) {
1942 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1943 (static_cast<size_t> (imports.extent (0)) !=
1944 numVecs * importLIDs.extent (0),
1946 "imports.extent(0) = " << imports.extent (0)
1947 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1948 <<
" * " << importLIDs.extent (0) <<
" = "
1949 << numVecs * importLIDs.extent (0) <<
".");
1951 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1952 (constantNumPackets == static_cast<size_t> (0), std::runtime_error,
1953 "constantNumPackets input argument must be nonzero.");
1955 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1956 (static_cast<size_t> (numVecs) !=
1957 static_cast<size_t> (constantNumPackets),
1958 std::runtime_error,
"constantNumPackets must equal numVecs.");
1967 if (this->imports_.need_sync_host()) this->imports_.sync_host();
1970 if (this->imports_.need_sync_device()) this->imports_.sync_device();
1973 if (printDebugOutput) {
1974 std::ostringstream os;
1975 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1977 std::cerr << os.str ();
1982 auto imports_d = imports.view_device ();
1983 auto imports_h = imports.view_host ();
1984 auto importLIDs_d = importLIDs.view_device ();
1985 auto importLIDs_h = importLIDs.view_host ();
1987 Kokkos::DualView<size_t*, device_type> whichVecs;
1988 if (! isConstantStride ()) {
1989 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1990 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1992 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1994 whichVecs.modify_host ();
1999 whichVecs.modify_device ();
2004 auto whichVecs_d = whichVecs.view_device ();
2005 auto whichVecs_h = whichVecs.view_host ();
2015 if (numVecs > 0 && importLIDs.extent (0) > 0) {
2016 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
2017 using host_exec_space =
typename dual_view_type::t_host::execution_space;
2020 const bool use_atomic_updates = unpackOnHost ?
2021 host_exec_space().concurrency () != 1 :
2022 dev_exec_space().concurrency () != 1;
2024 if (printDebugOutput) {
2025 std::ostringstream os;
2027 std::cerr << os.str ();
2034 using op_type = KokkosRefactor::Details::InsertOp;
2035 if (isConstantStride ()) {
2037 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2038 unpack_array_multi_column (host_exec_space (),
2039 X_h, imports_h, importLIDs_h,
2040 op_type (), numVecs,
2046 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2047 unpack_array_multi_column (space,
2048 X_d, imports_d, importLIDs_d,
2049 op_type (), numVecs,
2056 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2057 unpack_array_multi_column_variable_stride (host_exec_space (),
2067 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2068 unpack_array_multi_column_variable_stride (space,
2080 using op_type = KokkosRefactor::Details::AddOp;
2081 if (isConstantStride ()) {
2083 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2084 unpack_array_multi_column (host_exec_space (),
2085 X_h, imports_h, importLIDs_h,
2086 op_type (), numVecs,
2091 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2092 unpack_array_multi_column (space,
2093 X_d, imports_d, importLIDs_d,
2094 op_type (), numVecs,
2101 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2102 unpack_array_multi_column_variable_stride (host_exec_space (),
2112 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2113 unpack_array_multi_column_variable_stride (space,
2125 using op_type = KokkosRefactor::Details::AbsMaxOp;
2126 if (isConstantStride ()) {
2128 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2129 unpack_array_multi_column (host_exec_space (),
2130 X_h, imports_h, importLIDs_h,
2131 op_type (), numVecs,
2136 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2137 unpack_array_multi_column (space,
2138 X_d, imports_d, importLIDs_d,
2139 op_type (), numVecs,
2146 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2147 unpack_array_multi_column_variable_stride (host_exec_space (),
2157 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2158 unpack_array_multi_column_variable_stride (space,
2170 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2171 (
true, std::logic_error,
"Invalid CombineMode");
2175 if (printDebugOutput) {
2176 std::ostringstream os;
2177 os << *prefix <<
"Nothing to unpack" << endl;
2178 std::cerr << os.str ();
2182 if (printDebugOutput) {
2183 std::ostringstream os;
2184 os << *prefix <<
"Done!" << endl;
2185 std::cerr << os.str ();
2190 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2194 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
2195 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
2196 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
2197 const size_t constantNumPackets,
2199 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets, CM, execution_space());
2203 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2208 if (isConstantStride ()) {
2209 return static_cast<size_t> (view_.extent (1));
2211 return static_cast<size_t> (whichVectors_.size ());
2219 gblDotImpl (
const RV& dotsOut,
2220 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2221 const bool distributed)
2223 using Teuchos::REDUCE_MAX;
2224 using Teuchos::REDUCE_SUM;
2225 using Teuchos::reduceAll;
2226 typedef typename RV::non_const_value_type dot_type;
2228 const size_t numVecs = dotsOut.extent (0);
2243 if (distributed && ! comm.is_null ()) {
2246 const int nv =
static_cast<int> (numVecs);
2247 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
2249 if (commIsInterComm) {
2253 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
2256 const dot_type*
const lclSum = lclDots.data ();
2257 dot_type*
const gblSum = dotsOut.data ();
2258 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2261 dot_type*
const inout = dotsOut.data ();
2262 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
2268 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2272 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
2274 using ::Tpetra::Details::Behavior;
2275 using Kokkos::subview;
2276 using Teuchos::Comm;
2277 using Teuchos::null;
2280 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
2281 typedef typename dual_view_type::t_dev_const XMV;
2282 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
2286 const size_t numVecs = this->getNumVectors ();
2290 const size_t lclNumRows = this->getLocalLength ();
2291 const size_t numDots =
static_cast<size_t> (dots.extent (0));
2292 const bool debug = Behavior::debug ();
2295 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
2296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2297 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
2298 "compatible with the input MultiVector A. We only test for this "
2309 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2311 "MultiVectors do not have the same local length. "
2312 "this->getLocalLength() = " << lclNumRows <<
" != "
2314 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2316 "MultiVectors must have the same number of columns (vectors). "
2317 "this->getNumVectors() = " << numVecs <<
" != "
2319 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2320 numDots != numVecs, std::runtime_error,
2321 "The output array 'dots' must have the same number of entries as the "
2322 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2323 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2325 const std::pair<size_t, size_t> colRng (0, numVecs);
2326 RV dotsOut = subview (dots, colRng);
2327 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
2328 this->getMap ()->getComm ();
2330 auto thisView = this->getLocalViewDevice(Access::ReadOnly);
2333 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
2334 this->whichVectors_.getRawPtr (),
2345 exec_space_instance.fence();
2347 gblDotImpl (dotsOut, comm, this->isDistributed ());
2351 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2356 using ::Tpetra::Details::ProfilingRegion;
2358 using dot_type =
typename MV::dot_type;
2359 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
2362 Teuchos::RCP<const Teuchos::Comm<int> > comm =
2363 map.is_null () ? Teuchos::null : map->getComm ();
2364 if (comm.is_null ()) {
2365 return Kokkos::ArithTraits<dot_type>::zero ();
2368 using LO = LocalOrdinal;
2372 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
2374 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
2375 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
2376 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
2383 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
2385 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
2386 lclDot = KokkosBlas::dot (x_1d, y_1d);
2389 using Teuchos::outArg;
2390 using Teuchos::REDUCE_SUM;
2391 using Teuchos::reduceAll;
2392 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2404 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2408 const Teuchos::ArrayView<dot_type>& dots)
const
2410 const char tfecfFuncName[] =
"dot: ";
2413 const size_t numVecs = this->getNumVectors ();
2414 const size_t lclNumRows = this->getLocalLength ();
2415 const size_t numDots =
static_cast<size_t> (dots.size ());
2425 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2427 "MultiVectors do not have the same local length. "
2428 "this->getLocalLength() = " << lclNumRows <<
" != "
2430 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2432 "MultiVectors must have the same number of columns (vectors). "
2433 "this->getNumVectors() = " << numVecs <<
" != "
2435 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2436 (numDots != numVecs, std::runtime_error,
2437 "The output array 'dots' must have the same number of entries as the "
2438 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2439 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2442 const dot_type gblDot = multiVectorSingleColumnDot (*
this, A);
2446 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2450 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2453 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2456 using ::Tpetra::Details::NORM_TWO;
2457 using ::Tpetra::Details::ProfilingRegion;
2458 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2461 MV& X =
const_cast<MV&
> (*this);
2462 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2465 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2468 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2470 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2471 this->norm2 (norms_av);
2475 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2478 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2481 using ::Tpetra::Details::NORM_ONE;
2482 using ::Tpetra::Details::ProfilingRegion;
2483 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2486 MV& X =
const_cast<MV&
> (*this);
2487 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2490 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2493 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2495 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2496 this->norm1 (norms_av);
2499 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2502 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2505 using ::Tpetra::Details::NORM_INF;
2506 using ::Tpetra::Details::ProfilingRegion;
2507 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2510 MV& X =
const_cast<MV&
> (*this);
2511 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2514 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2517 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2519 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2520 this->normInf (norms_av);
2523 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2526 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2530 using Kokkos::subview;
2531 using Teuchos::Comm;
2533 using Teuchos::reduceAll;
2534 using Teuchos::REDUCE_SUM;
2535 typedef Kokkos::ArithTraits<impl_scalar_type> ATS;
2537 const size_t lclNumRows = this->getLocalLength ();
2538 const size_t numVecs = this->getNumVectors ();
2539 const size_t numMeans =
static_cast<size_t> (means.size ());
2541 TEUCHOS_TEST_FOR_EXCEPTION(
2542 numMeans != numVecs, std::runtime_error,
2543 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2544 <<
" != this->getNumVectors() = " << numVecs <<
".");
2546 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2547 const std::pair<size_t, size_t> colRng (0, numVecs);
2552 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2554 typename local_view_type::HostMirror::array_layout,
2556 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2557 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2559 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2560 this->getMap ()->getComm ();
2563 const bool useHostVersion = this->need_sync_device ();
2564 if (useHostVersion) {
2566 auto X_lcl = subview (getLocalViewHost(Access::ReadOnly),
2567 rowRng, Kokkos::ALL ());
2569 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2570 if (isConstantStride ()) {
2571 KokkosBlas::sum (lclSums, X_lcl);
2574 for (
size_t j = 0; j < numVecs; ++j) {
2575 const size_t col = whichVectors_[j];
2576 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2583 if (! comm.is_null () && this->isDistributed ()) {
2584 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2585 lclSums.data (), meansOut.data ());
2594 auto X_lcl = subview (this->getLocalViewDevice(Access::ReadOnly),
2595 rowRng, Kokkos::ALL ());
2598 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2599 if (isConstantStride ()) {
2600 KokkosBlas::sum (lclSums, X_lcl);
2603 for (
size_t j = 0; j < numVecs; ++j) {
2604 const size_t col = whichVectors_[j];
2605 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2615 exec_space_instance.fence();
2621 if (! comm.is_null () && this->isDistributed ()) {
2622 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2623 lclSums.data (), meansOut.data ());
2634 const impl_scalar_type OneOverN =
2635 ATS::one () /
static_cast<mag_type> (this->getGlobalLength ());
2636 for (
size_t k = 0; k < numMeans; ++k) {
2637 meansOut(k) = meansOut(k) * OneOverN;
2642 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2648 typedef Kokkos::ArithTraits<IST> ATS;
2649 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2650 typedef typename pool_type::generator_type generator_type;
2652 const IST max = Kokkos::rand<generator_type, IST>::max ();
2653 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2655 this->randomize (static_cast<Scalar> (min), static_cast<Scalar> (max));
2659 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2665 typedef Tpetra::Details::Static_Random_XorShift64_Pool<typename device_type::execution_space> tpetra_pool_type;
2666 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2669 if(!tpetra_pool_type::isSet())
2670 tpetra_pool_type::resetPool(this->getMap()->getComm()->getRank());
2672 pool_type & rand_pool = tpetra_pool_type::getPool();
2673 const IST max =
static_cast<IST
> (maxVal);
2674 const IST min =
static_cast<IST
> (minVal);
2676 auto thisView = this->getLocalViewDevice(Access::OverwriteAll);
2678 if (isConstantStride ()) {
2679 Kokkos::fill_random (thisView, rand_pool, min, max);
2682 const size_t numVecs = getNumVectors ();
2683 for (
size_t k = 0; k < numVecs; ++k) {
2684 const size_t col = whichVectors_[k];
2685 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2686 Kokkos::fill_random (X_k, rand_pool, min, max);
2691 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2696 using ::Tpetra::Details::ProfilingRegion;
2697 using ::Tpetra::Details::Blas::fill;
2698 using DES =
typename dual_view_type::t_dev::execution_space;
2699 using HES =
typename dual_view_type::t_host::execution_space;
2700 using LO = LocalOrdinal;
2701 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2706 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
2707 const LO numVecs =
static_cast<LO
> (this->getNumVectors ());
2713 const bool runOnHost = runKernelOnHost(*
this);
2715 auto X = this->getLocalViewDevice(Access::OverwriteAll);
2716 if (this->isConstantStride ()) {
2717 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2720 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2721 this->whichVectors_.getRawPtr ());
2725 auto X = this->getLocalViewHost(Access::OverwriteAll);
2726 if (this->isConstantStride ()) {
2727 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2730 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2731 this->whichVectors_.getRawPtr ());
2737 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2742 using Teuchos::ArrayRCP;
2743 using Teuchos::Comm;
2746 using LO = LocalOrdinal;
2747 using GO = GlobalOrdinal;
2753 TEUCHOS_TEST_FOR_EXCEPTION(
2754 ! this->isConstantStride (), std::logic_error,
2755 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2756 "if the MultiVector is a column view of another MultiVector (that is, if "
2757 "isConstantStride() == false).");
2787 if (this->getMap ().is_null ()) {
2792 TEUCHOS_TEST_FOR_EXCEPTION(
2793 newMap.is_null (), std::invalid_argument,
2794 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2795 "This probably means that the input Map is incorrect.");
2799 const size_t newNumRows = newMap->getLocalNumElements ();
2800 const size_t origNumRows = view_.extent (0);
2801 const size_t numCols = this->getNumVectors ();
2803 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2804 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2807 else if (newMap.is_null ()) {
2810 const size_t newNumRows =
static_cast<size_t> (0);
2811 const size_t numCols = this->getNumVectors ();
2812 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2815 this->map_ = newMap;
2818 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2826 const IST theAlpha =
static_cast<IST
> (alpha);
2827 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2830 const size_t lclNumRows = getLocalLength ();
2831 const size_t numVecs = getNumVectors ();
2832 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2833 const std::pair<size_t, size_t> colRng (0, numVecs);
2841 const bool useHostVersion = need_sync_device ();
2842 if (useHostVersion) {
2843 auto Y_lcl = Kokkos::subview (getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2844 if (isConstantStride ()) {
2845 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2848 for (
size_t k = 0; k < numVecs; ++k) {
2849 const size_t Y_col = whichVectors_[k];
2850 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2851 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2856 auto Y_lcl = Kokkos::subview (getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2857 if (isConstantStride ()) {
2858 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2861 for (
size_t k = 0; k < numVecs; ++k) {
2862 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2863 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2864 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2871 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2874 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2876 const size_t numVecs = this->getNumVectors ();
2877 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2878 TEUCHOS_TEST_FOR_EXCEPTION(
2879 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2880 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2884 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2885 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2886 k_alphas.modify_host ();
2887 for (
size_t i = 0; i < numAlphas; ++i) {
2890 k_alphas.sync_device ();
2892 this->scale (k_alphas.view_device ());
2895 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2898 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2901 using Kokkos::subview;
2903 const size_t lclNumRows = this->getLocalLength ();
2904 const size_t numVecs = this->getNumVectors ();
2905 TEUCHOS_TEST_FOR_EXCEPTION(
2906 static_cast<size_t> (alphas.extent (0)) != numVecs,
2907 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2908 "alphas.extent(0) = " << alphas.extent (0)
2909 <<
" != this->getNumVectors () = " << numVecs <<
".");
2910 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2911 const std::pair<size_t, size_t> colRng (0, numVecs);
2921 const bool useHostVersion = this->need_sync_device ();
2922 if (useHostVersion) {
2925 auto alphas_h = Kokkos::create_mirror_view (alphas);
2929 auto Y_lcl = subview (this->getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2930 if (isConstantStride ()) {
2931 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2934 for (
size_t k = 0; k < numVecs; ++k) {
2935 const size_t Y_col = this->isConstantStride () ? k :
2936 this->whichVectors_[k];
2937 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2940 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2945 auto Y_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2946 if (isConstantStride ()) {
2947 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2954 auto alphas_h = Kokkos::create_mirror_view (alphas);
2958 for (
size_t k = 0; k < numVecs; ++k) {
2959 const size_t Y_col = this->isConstantStride () ? k :
2960 this->whichVectors_[k];
2961 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2962 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2968 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2975 using Kokkos::subview;
2976 const char tfecfFuncName[] =
"scale: ";
2978 const size_t lclNumRows = getLocalLength ();
2979 const size_t numVecs = getNumVectors ();
2981 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2983 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2985 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2987 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2991 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2992 const std::pair<size_t, size_t> colRng (0, numVecs);
2994 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
2996 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
2997 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
3000 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
3004 for (
size_t k = 0; k < numVecs; ++k) {
3005 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3007 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3008 auto X_k = subview (X_lcl, ALL (), X_col);
3010 KokkosBlas::scal (Y_k, theAlpha, X_k);
3017 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3022 const char tfecfFuncName[] =
"reciprocal: ";
3024 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3026 "MultiVectors do not have the same local length. "
3027 "this->getLocalLength() = " << getLocalLength ()
3029 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3030 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3031 ": MultiVectors do not have the same number of columns (vectors). "
3032 "this->getNumVectors() = " << getNumVectors ()
3033 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3035 const size_t numVecs = getNumVectors ();
3037 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
3041 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
3045 using Kokkos::subview;
3046 for (
size_t k = 0; k < numVecs; ++k) {
3047 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3048 auto vector_k = subview (this_view_dev, ALL (), this_col);
3049 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3050 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3051 KokkosBlas::reciprocal (vector_k, vector_Ak);
3056 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3061 const char tfecfFuncName[] =
"abs";
3063 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3065 ": MultiVectors do not have the same local length. "
3066 "this->getLocalLength() = " << getLocalLength ()
3068 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3069 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3070 ": MultiVectors do not have the same number of columns (vectors). "
3071 "this->getNumVectors() = " << getNumVectors ()
3072 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3073 const size_t numVecs = getNumVectors ();
3075 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
3079 KokkosBlas::abs (this_view_dev, A_view_dev);
3083 using Kokkos::subview;
3085 for (
size_t k=0; k < numVecs; ++k) {
3086 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3087 auto vector_k = subview (this_view_dev, ALL (), this_col);
3088 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3089 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3090 KokkosBlas::abs (vector_k, vector_Ak);
3095 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3102 const char tfecfFuncName[] =
"update: ";
3103 using Kokkos::subview;
3108 const size_t lclNumRows = getLocalLength ();
3109 const size_t numVecs = getNumVectors ();
3112 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3114 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
3116 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3118 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3124 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3125 const std::pair<size_t, size_t> colRng (0, numVecs);
3127 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
3128 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
3130 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
3134 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
3138 for (
size_t k = 0; k < numVecs; ++k) {
3139 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3141 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3142 auto X_k = subview (X_lcl, ALL (), X_col);
3144 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
3149 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3156 const Scalar& gamma)
3159 using Kokkos::subview;
3161 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3165 const size_t lclNumRows = this->getLocalLength ();
3166 const size_t numVecs = getNumVectors ();
3169 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3171 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
3172 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3174 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3176 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
3177 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3179 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3181 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
3182 "but this MultiVector has " << numVecs <<
" column(s).");
3183 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3185 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
3186 "but this MultiVector has " << numVecs <<
" column(s).");
3193 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3194 const std::pair<size_t, size_t> colRng (0, numVecs);
3199 auto C_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
3204 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3209 for (
size_t k = 0; k < numVecs; ++k) {
3210 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3213 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3214 theBeta, subview (B_lcl, rowRng, B_col),
3215 theGamma, subview (C_lcl, rowRng, this_col));
3220 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3221 Teuchos::ArrayRCP<const Scalar>
3227 const char tfecfFuncName[] =
"getData: ";
3234 auto hostView = getLocalViewHost(Access::ReadOnly);
3235 const size_t col = isConstantStride () ? j : whichVectors_[j];
3236 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
3237 Teuchos::ArrayRCP<const IST> dataAsArcp =
3238 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3240 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3241 (static_cast<size_t> (hostView_j.extent (0)) <
3242 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3243 "hostView_j.extent(0) = " << hostView_j.extent (0)
3244 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3245 "Please report this bug to the Tpetra developers.");
3247 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3250 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3251 Teuchos::ArrayRCP<Scalar>
3256 using Kokkos::subview;
3258 const char tfecfFuncName[] =
"getDataNonConst: ";
3260 auto hostView = getLocalViewHost(Access::ReadWrite);
3261 const size_t col = isConstantStride () ? j : whichVectors_[j];
3262 auto hostView_j = subview (hostView, ALL (), col);
3263 Teuchos::ArrayRCP<IST> dataAsArcp =
3264 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3267 (static_cast<size_t> (hostView_j.extent (0)) <
3268 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3269 "hostView_j.extent(0) = " << hostView_j.extent (0)
3270 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3271 "Please report this bug to the Tpetra developers.");
3273 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3276 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3277 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3279 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3286 bool contiguous =
true;
3287 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3288 for (
size_t j = 1; j < numCopyVecs; ++j) {
3289 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3294 if (contiguous && numCopyVecs > 0) {
3295 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3298 RCP<const MV> X_sub = this->subView (cols);
3299 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3305 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3306 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3313 RCP<const MV> X_sub = this->subView (colRng);
3314 RCP<MV> Y (
new MV (this->getMap (), static_cast<size_t> (colRng.size ()),
false));
3319 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3323 return view_.origExtent(0);
3326 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3330 return view_.origExtent(1);
3333 template <
class Scalar,
class LO,
class GO,
class Node>
3336 const Teuchos::RCP<const map_type>& subMap,
3337 const local_ordinal_type rowOffset) :
3341 using Kokkos::subview;
3342 using Teuchos::outArg;
3345 using Teuchos::reduceAll;
3346 using Teuchos::REDUCE_MIN;
3349 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3350 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3353 std::unique_ptr<std::ostringstream> errStrm;
3360 const auto comm = subMap->getComm ();
3361 TEUCHOS_ASSERT( ! comm.is_null () );
3362 const int myRank = comm->getRank ();
3364 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3366 const LO newNumRows =
static_cast<LO
> (subMap->getLocalNumElements ());
3368 std::ostringstream os;
3369 os <<
"Proc " << myRank <<
": " << prefix
3370 <<
"X: {lclNumRows: " << lclNumRowsBefore
3372 <<
", numCols: " << numCols <<
"}, "
3373 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3374 std::cerr << os.str ();
3379 const bool tooManyElts =
3382 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3383 *errStrm <<
" Proc " << myRank <<
": subMap->getLocalNumElements() (="
3384 << newNumRows <<
") + rowOffset (=" << rowOffset
3388 TEUCHOS_TEST_FOR_EXCEPTION
3389 (! debug && tooManyElts, std::invalid_argument,
3390 prefix << errStrm->str () << suffix);
3394 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3396 std::ostringstream gblErrStrm;
3397 const std::string myErrStr =
3398 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3399 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3400 TEUCHOS_TEST_FOR_EXCEPTION
3401 (
true, std::invalid_argument, gblErrStrm.str ());
3405 using range_type = std::pair<LO, LO>;
3406 const range_type origRowRng
3407 (rowOffset, static_cast<LO> (X.
view_.origExtent (0)));
3408 const range_type rowRng
3409 (rowOffset, rowOffset + newNumRows);
3411 wrapped_dual_view_type newView = takeSubview (X.
view_, rowRng, ALL ());
3418 if (newView.extent (0) == 0 &&
3419 newView.extent (1) != X.
view_.extent (1)) {
3421 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3425 MV (subMap, newView) :
3429 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3430 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3431 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3433 if (errStrm.get () ==
nullptr) {
3434 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3436 *errStrm <<
" Proc " << myRank <<
3437 ": subMap.getLocalNumElements(): " << newNumRows <<
3438 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3439 ", X.getNumVectors(): " << numCols <<
3440 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3442 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3444 std::ostringstream gblErrStrm;
3446 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3447 "dimensions on one or more processes:" << endl;
3449 const std::string myErrStr =
3450 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3451 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3452 gblErrStrm << suffix << endl;
3453 TEUCHOS_TEST_FOR_EXCEPTION
3454 (
true, std::invalid_argument, gblErrStrm.str ());
3459 std::ostringstream os;
3460 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3461 std::cerr << os.str ();
3467 std::ostringstream os;
3468 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3469 std::cerr << os.str ();
3473 template <
class Scalar,
class LO,
class GO,
class Node>
3476 const map_type& subMap,
3477 const size_t rowOffset) :
3478 MultiVector (X, Teuchos::RCP<const map_type> (new map_type (subMap)),
3482 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3483 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3486 const size_t offset)
const
3489 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3492 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3493 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3496 const size_t offset)
3499 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3502 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3503 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3505 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3507 using Teuchos::Array;
3511 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3512 TEUCHOS_TEST_FOR_EXCEPTION(
3513 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3514 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3515 "contain at least one entry, but cols.size() = " << cols.size ()
3520 bool contiguous =
true;
3521 for (
size_t j = 1; j < numViewCols; ++j) {
3522 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3528 if (numViewCols == 0) {
3530 return rcp (
new MV (this->getMap (), numViewCols));
3533 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3537 if (isConstantStride ()) {
3538 return rcp (
new MV (this->getMap (), view_, cols));
3541 Array<size_t> newcols (cols.size ());
3542 for (
size_t j = 0; j < numViewCols; ++j) {
3543 newcols[j] = whichVectors_[cols[j]];
3545 return rcp (
new MV (this->getMap (), view_, newcols ()));
3550 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3551 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3555 using ::Tpetra::Details::Behavior;
3557 using Kokkos::subview;
3558 using Teuchos::Array;
3562 const char tfecfFuncName[] =
"subView(Range1D): ";
3564 const size_t lclNumRows = this->getLocalLength ();
3565 const size_t numVecs = this->getNumVectors ();
3569 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3570 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3571 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3574 numVecs != 0 && colRng.size () != 0 &&
3575 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3576 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3577 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3578 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3579 "[0, " << numVecs <<
"].");
3581 RCP<const MV> X_ret;
3591 const std::pair<size_t, size_t> rows (0, lclNumRows);
3592 if (colRng.size () == 0) {
3593 const std::pair<size_t, size_t> cols (0, 0);
3594 wrapped_dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3595 X_ret = rcp (
new MV (this->getMap (), X_sub));
3599 if (isConstantStride ()) {
3600 const std::pair<size_t, size_t> cols (colRng.lbound (),
3601 colRng.ubound () + 1);
3602 wrapped_dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3603 X_ret = rcp (
new MV (this->getMap (), X_sub));
3606 if (static_cast<size_t> (colRng.size ()) == static_cast<size_t> (1)) {
3609 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3610 whichVectors_[0] + colRng.ubound () + 1);
3611 wrapped_dual_view_type X_sub = takeSubview (view_, ALL (), col);
3612 X_ret = rcp (
new MV (this->getMap (), X_sub));
3615 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3616 whichVectors_.begin () + colRng.ubound () + 1);
3617 X_ret = rcp (
new MV (this->getMap (), view_, which));
3622 const bool debug = Behavior::debug ();
3624 using Teuchos::Comm;
3625 using Teuchos::outArg;
3626 using Teuchos::REDUCE_MIN;
3627 using Teuchos::reduceAll;
3629 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3630 Teuchos::null : this->getMap ()->getComm ();
3631 if (! comm.is_null ()) {
3635 if (X_ret.is_null ()) {
3638 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3640 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3641 "MultiVector; the return value of this method) is null on some MPI "
3642 "process in this MultiVector's communicator. This should never "
3643 "happen. Please report this bug to the Tpetra developers.");
3644 if (! X_ret.is_null () &&
3645 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3648 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3649 outArg (gblSuccess));
3650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3651 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3652 "colRng.size(), on at least one MPI process in this MultiVector's "
3653 "communicator. This should never happen. "
3654 "Please report this bug to the Tpetra developers.");
3661 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3662 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3667 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3671 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3672 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3677 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3681 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3687 using Kokkos::subview;
3688 typedef std::pair<size_t, size_t> range_type;
3689 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3692 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3693 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3694 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3696 static_cast<size_t> (j) :
3698 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3709 const size_t newSize = X.
imports_.extent (0) / numCols;
3710 const size_t offset = jj*newSize;
3712 newImports.d_view = subview (X.
imports_.d_view,
3713 range_type (offset, offset+newSize));
3714 newImports.h_view = subview (X.
imports_.h_view,
3715 range_type (offset, offset+newSize));
3719 const size_t newSize = X.
exports_.extent (0) / numCols;
3720 const size_t offset = jj*newSize;
3722 newExports.d_view = subview (X.
exports_.d_view,
3723 range_type (offset, offset+newSize));
3724 newExports.h_view = subview (X.
exports_.h_view,
3725 range_type (offset, offset+newSize));
3736 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3737 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3742 return Teuchos::rcp (
new V (*
this, j));
3746 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3747 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3752 return Teuchos::rcp (
new V (*
this, j));
3756 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3759 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3762 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3764 Kokkos::MemoryUnmanaged>;
3765 const char tfecfFuncName[] =
"get1dCopy: ";
3767 const size_t numRows = this->getLocalLength ();
3768 const size_t numCols = this->getNumVectors ();
3770 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3771 (LDA < numRows, std::runtime_error,
3772 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3773 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3774 (numRows >
size_t (0) && numCols >
size_t (0) &&
3775 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3777 "A.size() = " << A.size () <<
", but its size must be at least "
3778 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3780 const std::pair<size_t, size_t> rowRange (0, numRows);
3781 const std::pair<size_t, size_t> colRange (0, numCols);
3783 input_view_type A_view_orig (reinterpret_cast<IST*> (A.getRawPtr ()),
3785 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3798 if (this->need_sync_host() && this->need_sync_device()) {
3801 throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
3804 const bool useHostView = view_.host_view_use_count() >= view_.device_view_use_count();
3805 if (this->isConstantStride ()) {
3807 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3811 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3817 for (
size_t j = 0; j < numCols; ++j) {
3818 const size_t srcCol = this->whichVectors_[j];
3819 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3822 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3823 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3827 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3828 auto srcColView_device = Kokkos::subview (srcView_device, rowRange, srcCol);
3838 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3841 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3844 const char tfecfFuncName[] =
"get2dCopy: ";
3845 const size_t numRows = this->getLocalLength ();
3846 const size_t numCols = this->getNumVectors ();
3848 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3849 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3850 std::runtime_error,
"Input array of pointers must contain as many "
3851 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3852 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3854 if (numRows != 0 && numCols != 0) {
3856 for (
size_t j = 0; j < numCols; ++j) {
3857 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3858 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3859 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3860 "the input array of arrays is not long enough to fit all entries in "
3861 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3862 <<
" < getLocalLength() = " << numRows <<
".");
3866 for (
size_t j = 0; j < numCols; ++j) {
3867 Teuchos::RCP<const V> X_j = this->getVector (j);
3868 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3869 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3875 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3876 Teuchos::ArrayRCP<const Scalar>
3880 if (getLocalLength () == 0 || getNumVectors () == 0) {
3881 return Teuchos::null;
3883 TEUCHOS_TEST_FOR_EXCEPTION(
3884 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3885 "get1dView: This MultiVector does not have constant stride, so it is "
3886 "not possible to view its data as a single array. You may check "
3887 "whether a MultiVector has constant stride by calling "
3888 "isConstantStride().");
3892 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3893 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3894 Kokkos::Compat::persistingView (X_lcl);
3895 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3899 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3900 Teuchos::ArrayRCP<Scalar>
3904 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3905 return Teuchos::null;
3908 TEUCHOS_TEST_FOR_EXCEPTION
3909 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3910 "get1dViewNonConst: This MultiVector does not have constant stride, "
3911 "so it is not possible to view its data as a single array. You may "
3912 "check whether a MultiVector has constant stride by calling "
3913 "isConstantStride().");
3914 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3915 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3916 Kokkos::Compat::persistingView (X_lcl);
3917 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3921 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3922 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3926 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3932 const size_t myNumRows = this->getLocalLength ();
3933 const size_t numCols = this->getNumVectors ();
3934 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3936 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3937 for (
size_t j = 0; j < numCols; ++j) {
3938 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3939 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3940 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3941 Kokkos::Compat::persistingView (X_lcl_j);
3942 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3947 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3952 return view_.getHostView(s);
3955 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3960 return view_.getHostView(s);
3963 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3968 return view_.getHostView(s);
3971 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3976 return view_.getDeviceView(s);
3979 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3984 return view_.getDeviceView(s);
3987 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3992 return view_.getDeviceView(s);
3995 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4002 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4003 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
4010 auto X_lcl = getLocalViewHost(Access::ReadOnly);
4016 const size_t myNumRows = this->getLocalLength ();
4017 const size_t numCols = this->getNumVectors ();
4018 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
4020 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
4021 for (
size_t j = 0; j < numCols; ++j) {
4022 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
4023 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
4024 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
4025 Kokkos::Compat::persistingView (X_lcl_j);
4026 views[j] = Teuchos::arcp_reinterpret_cast<
const Scalar> (X_lcl_j_arcp);
4031 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4035 Teuchos::ETransp transB,
4036 const Scalar& alpha,
4041 using ::Tpetra::Details::ProfilingRegion;
4042 using Teuchos::CONJ_TRANS;
4043 using Teuchos::NO_TRANS;
4044 using Teuchos::TRANS;
4047 using Teuchos::rcpFromRef;
4049 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
4051 using STS = Teuchos::ScalarTraits<Scalar>;
4053 const char tfecfFuncName[] =
"multiply: ";
4054 ProfilingRegion region (
"Tpetra::MV::multiply");
4086 const bool C_is_local = ! this->isDistributed ();
4096 auto myMap = this->getMap ();
4097 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
4098 using Teuchos::REDUCE_MIN;
4099 using Teuchos::reduceAll;
4100 using Teuchos::outArg;
4102 auto comm = myMap->getComm ();
4103 const size_t A_nrows =
4105 const size_t A_ncols =
4107 const size_t B_nrows =
4109 const size_t B_ncols =
4112 const bool lclBad = this->getLocalLength () != A_nrows ||
4113 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
4115 const int myRank = comm->getRank ();
4116 std::ostringstream errStrm;
4117 if (this->getLocalLength () != A_nrows) {
4118 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
4119 << this->getLocalLength () <<
" != A_nrows=" << A_nrows
4120 <<
"." << std::endl;
4122 if (this->getNumVectors () != B_ncols) {
4123 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
4124 << this->getNumVectors () <<
" != B_ncols=" << B_ncols
4125 <<
"." << std::endl;
4127 if (A_ncols != B_nrows) {
4128 errStrm <<
"Proc " << myRank <<
": A_ncols="
4129 << A_ncols <<
" != B_nrows=" << B_nrows
4130 <<
"." << std::endl;
4133 const int lclGood = lclBad ? 0 : 1;
4135 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
4137 std::ostringstream os;
4138 ::Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
4140 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4141 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
4142 "least one process in this object's communicator." << std::endl
4144 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
4146 << (transA == Teuchos::TRANS ?
"^T" :
4147 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4148 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
4150 << (transA == Teuchos::TRANS ?
"^T" :
4151 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4152 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
4153 <<
"Global dimensions: C(" << this->getGlobalLength () <<
", "
4163 const bool Case1 = C_is_local && A_is_local && B_is_local;
4165 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
4166 transA != NO_TRANS &&
4169 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
4173 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4174 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
4175 "Multiplication of op(A) and op(B) into *this is not a "
4176 "supported use case.");
4178 if (beta != STS::zero () && Case2) {
4184 const int myRank = this->getMap ()->getComm ()->getRank ();
4186 beta_local = ATS::zero ();
4195 if (! isConstantStride ()) {
4196 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
4198 C_tmp = rcp (
this,
false);
4201 RCP<const MV> A_tmp;
4203 A_tmp = rcp (
new MV (A, Teuchos::Copy));
4205 A_tmp = rcpFromRef (A);
4208 RCP<const MV> B_tmp;
4210 B_tmp = rcp (
new MV (B, Teuchos::Copy));
4212 B_tmp = rcpFromRef (B);
4215 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4216 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
4217 ! A_tmp->isConstantStride (), std::logic_error,
4218 "Failed to make temporary constant-stride copies of MultiVectors.");
4221 const LO A_lclNumRows = A_tmp->getLocalLength ();
4222 const LO A_numVecs = A_tmp->getNumVectors ();
4223 auto A_lcl = A_tmp->getLocalViewDevice(Access::ReadOnly);
4224 auto A_sub = Kokkos::subview (A_lcl,
4225 std::make_pair (LO (0), A_lclNumRows),
4226 std::make_pair (LO (0), A_numVecs));
4229 const LO B_lclNumRows = B_tmp->getLocalLength ();
4230 const LO B_numVecs = B_tmp->getNumVectors ();
4231 auto B_lcl = B_tmp->getLocalViewDevice(Access::ReadOnly);
4232 auto B_sub = Kokkos::subview (B_lcl,
4233 std::make_pair (LO (0), B_lclNumRows),
4234 std::make_pair (LO (0), B_numVecs));
4236 const LO C_lclNumRows = C_tmp->getLocalLength ();
4237 const LO C_numVecs = C_tmp->getNumVectors ();
4239 auto C_lcl = C_tmp->getLocalViewDevice(Access::ReadWrite);
4240 auto C_sub = Kokkos::subview (C_lcl,
4241 std::make_pair (LO (0), C_lclNumRows),
4242 std::make_pair (LO (0), C_numVecs));
4243 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4244 (transA == Teuchos::TRANS ?
'T' :
'C'));
4245 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4246 (transB == Teuchos::TRANS ?
'T' :
'C'));
4249 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
4251 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4255 if (! isConstantStride ()) {
4260 A_tmp = Teuchos::null;
4261 B_tmp = Teuchos::null;
4269 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4278 using Kokkos::subview;
4279 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4281 const size_t lclNumRows = this->getLocalLength ();
4282 const size_t numVecs = this->getNumVectors ();
4284 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4286 std::runtime_error,
"MultiVectors do not have the same local length.");
4287 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4288 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4289 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4292 auto this_view = this->getLocalViewDevice(Access::ReadWrite);
4302 KokkosBlas::mult (scalarThis,
4305 subview (A_view, ALL (), 0),
4309 for (
size_t j = 0; j < numVecs; ++j) {
4310 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4312 KokkosBlas::mult (scalarThis,
4313 subview (this_view, ALL (), C_col),
4315 subview (A_view, ALL (), 0),
4316 subview (B_view, ALL (), B_col));
4321 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4327 using ::Tpetra::Details::ProfilingRegion;
4328 ProfilingRegion region (
"Tpetra::MV::reduce");
4330 const auto map = this->getMap ();
4331 if (map.get () ==
nullptr) {
4334 const auto comm = map->getComm ();
4335 if (comm.get () ==
nullptr) {
4341 const bool changed_on_device = this->need_sync_host ();
4342 if (changed_on_device) {
4346 Kokkos::fence(
"MultiVector::reduce");
4347 auto X_lcl = this->getLocalViewDevice(Access::ReadWrite);
4351 auto X_lcl = this->getLocalViewHost(Access::ReadWrite);
4356 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4364 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4365 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4366 TEUCHOS_TEST_FOR_EXCEPTION(
4367 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4369 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4370 <<
" is invalid. The range of valid row indices on this process "
4371 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4372 <<
", " << maxLocalIndex <<
"].");
4373 TEUCHOS_TEST_FOR_EXCEPTION(
4374 vectorIndexOutOfRange(col),
4376 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4377 <<
" of the multivector is invalid.");
4380 auto view = this->getLocalViewHost(Access::ReadWrite);
4381 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4382 view (lclRow, colInd) = ScalarValue;
4386 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4395 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4396 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4397 TEUCHOS_TEST_FOR_EXCEPTION(
4398 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4400 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4401 <<
" is invalid. The range of valid row indices on this process "
4402 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4403 <<
", " << maxLocalIndex <<
"].");
4404 TEUCHOS_TEST_FOR_EXCEPTION(
4405 vectorIndexOutOfRange(col),
4407 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4408 <<
" of the multivector is invalid.");
4411 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4413 auto view = this->getLocalViewHost(Access::ReadWrite);
4415 Kokkos::atomic_add (& (view(lclRow, colInd)), value);
4418 view(lclRow, colInd) += value;
4423 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4432 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4435 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4436 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4437 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4439 "Global row index " << gblRow <<
" is not present on this process "
4440 << this->getMap ()->getComm ()->getRank () <<
".");
4441 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4442 (this->vectorIndexOutOfRange (col), std::runtime_error,
4443 "Vector index " << col <<
" of the MultiVector is invalid.");
4446 this->replaceLocalValue (lclRow, col, ScalarValue);
4449 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4459 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4462 TEUCHOS_TEST_FOR_EXCEPTION(
4463 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4465 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4466 <<
" is not present on this process "
4467 << this->getMap ()->getComm ()->getRank () <<
".");
4468 TEUCHOS_TEST_FOR_EXCEPTION(
4469 vectorIndexOutOfRange(col),
4471 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4472 <<
" of the multivector is invalid.");
4475 this->sumIntoLocalValue (lclRow, col, value, atomic);
4478 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4480 Teuchos::ArrayRCP<T>
4486 typename dual_view_type::array_layout,
4488 const size_t col = isConstantStride () ? j : whichVectors_[j];
4489 col_dual_view_type X_col =
4490 Kokkos::subview (view_, Kokkos::ALL (), col);
4491 return Kokkos::Compat::persistingView (X_col.d_view);
4494 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4501 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4508 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4513 using Teuchos::TypeNameTraits;
4515 std::ostringstream out;
4516 out <<
"\"" << className <<
"\": {";
4517 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4518 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4519 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4520 <<
", Node" << Node::name ()
4522 if (this->getObjectLabel () !=
"") {
4523 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4525 out <<
", numRows: " << this->getGlobalLength ();
4526 if (className !=
"Tpetra::Vector") {
4527 out <<
", numCols: " << this->getNumVectors ()
4528 <<
", isConstantStride: " << this->isConstantStride ();
4530 if (this->isConstantStride ()) {
4531 out <<
", columnStride: " << this->getStride ();
4538 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4543 return this->descriptionImpl (
"Tpetra::MultiVector");
4548 template<
typename ViewType>
4549 void print_vector(Teuchos::FancyOStream & out,
const char* prefix,
const ViewType& v)
4552 static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
4553 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
4554 static_assert(ViewType::rank == 2,
4555 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
4558 out <<
"Values("<<prefix<<
"): " << std::endl
4560 const size_t numRows = v.extent(0);
4561 const size_t numCols = v.extent(1);
4563 for (
size_t i = 0; i < numRows; ++i) {
4565 if (i + 1 < numRows) {
4571 for (
size_t i = 0; i < numRows; ++i) {
4572 for (
size_t j = 0; j < numCols; ++j) {
4574 if (j + 1 < numCols) {
4578 if (i + 1 < numRows) {
4587 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4592 typedef LocalOrdinal LO;
4595 if (vl <= Teuchos::VERB_LOW) {
4596 return std::string ();
4598 auto map = this->getMap ();
4599 if (map.is_null ()) {
4600 return std::string ();
4602 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4603 auto outp = Teuchos::getFancyOStream (outStringP);
4604 Teuchos::FancyOStream& out = *outp;
4605 auto comm = map->getComm ();
4606 const int myRank = comm->getRank ();
4607 const int numProcs = comm->getSize ();
4609 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4610 Teuchos::OSTab tab1 (out);
4613 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4614 out <<
"Local number of rows: " << lclNumRows << endl;
4616 if (vl > Teuchos::VERB_MEDIUM) {
4619 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4620 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4622 if (this->isConstantStride ()) {
4623 out <<
"Column stride: " << this->getStride () << endl;
4626 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4634 auto X_dev = view_.getDeviceCopy();
4635 auto X_host = view_.getHostCopy();
4637 if(X_dev.data() == X_host.data()) {
4639 Details::print_vector(out,
"unified",X_host);
4642 Details::print_vector(out,
"host",X_host);
4643 Details::print_vector(out,
"dev",X_dev);
4648 return outStringP->str ();
4651 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4655 const std::string& className,
4656 const Teuchos::EVerbosityLevel verbLevel)
const
4658 using Teuchos::TypeNameTraits;
4659 using Teuchos::VERB_DEFAULT;
4660 using Teuchos::VERB_NONE;
4661 using Teuchos::VERB_LOW;
4663 const Teuchos::EVerbosityLevel vl =
4664 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4666 if (vl == VERB_NONE) {
4673 auto map = this->getMap ();
4674 if (map.is_null ()) {
4677 auto comm = map->getComm ();
4678 if (comm.is_null ()) {
4682 const int myRank = comm->getRank ();
4691 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4695 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4696 out <<
"\"" << className <<
"\":" << endl;
4697 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4699 out <<
"Template parameters:" << endl;
4700 Teuchos::OSTab tab2 (out);
4701 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4702 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4703 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4704 <<
"Node: " << Node::name () << endl;
4706 if (this->getObjectLabel () !=
"") {
4707 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4709 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4710 if (className !=
"Tpetra::Vector") {
4711 out <<
"Number of columns: " << this->getNumVectors () << endl;
4718 if (vl > VERB_LOW) {
4719 const std::string lclStr = this->localDescribeToString (vl);
4720 ::Tpetra::Details::gathervPrint (out, lclStr, *comm);
4724 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4728 const Teuchos::EVerbosityLevel verbLevel)
const
4730 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4733 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4738 replaceMap (newMap);
4741 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4746 using ::Tpetra::Details::localDeepCopy;
4747 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4749 TEUCHOS_TEST_FOR_EXCEPTION
4751 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4752 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4753 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4754 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4755 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4757 TEUCHOS_TEST_FOR_EXCEPTION
4758 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4759 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4760 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4761 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4776 if (src_last_updated_on_host) {
4777 localDeepCopy (this->getLocalViewHost(Access::ReadWrite),
4779 this->isConstantStride (),
4781 this->whichVectors_ (),
4785 localDeepCopy (this->getLocalViewDevice(Access::ReadWrite),
4787 this->isConstantStride (),
4789 this->whichVectors_ (),
4794 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4796 Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
4803 this->getNumVectors(),
4809 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4814 using ::Tpetra::Details::PackTraits;
4816 const size_t l1 = this->getLocalLength();
4818 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4825 template <
class ST,
class LO,
class GO,
class NT>
4828 std::swap(mv.
map_, this->map_);
4829 std::swap(mv.
view_, this->view_);
4833 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4834 template <
class ST,
class LO,
class GO,
class NT>
4837 const Teuchos::SerialDenseMatrix<int, ST>& src)
4839 using ::Tpetra::Details::localDeepCopy;
4841 using IST =
typename MV::impl_scalar_type;
4842 using input_view_type =
4843 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4844 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4845 using pair_type = std::pair<LO, LO>;
4847 const LO numRows =
static_cast<LO
> (src.numRows ());
4848 const LO numCols =
static_cast<LO
> (src.numCols ());
4850 TEUCHOS_TEST_FOR_EXCEPTION
4853 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4854 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4856 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4858 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4859 input_view_type src_orig (src_raw, src.stride (), numCols);
4860 input_view_type src_view =
4861 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4863 constexpr
bool src_isConstantStride =
true;
4864 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4868 src_isConstantStride,
4869 getMultiVectorWhichVectors (dst),
4873 template <
class ST,
class LO,
class GO,
class NT>
4875 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4878 using ::Tpetra::Details::localDeepCopy;
4880 using IST =
typename MV::impl_scalar_type;
4881 using output_view_type =
4882 Kokkos::View<IST**, Kokkos::LayoutLeft,
4883 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4884 using pair_type = std::pair<LO, LO>;
4886 const LO numRows =
static_cast<LO
> (dst.numRows ());
4887 const LO numCols =
static_cast<LO
> (dst.numCols ());
4889 TEUCHOS_TEST_FOR_EXCEPTION
4892 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4893 << src.
getMap ()->getComm ()->getRank () <<
", src is "
4895 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4897 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4898 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4900 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4902 constexpr
bool dst_isConstantStride =
true;
4903 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4906 localDeepCopy (dst_view,
4908 dst_isConstantStride,
4911 getMultiVectorWhichVectors (src));
4913 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4915 template <
class Scalar,
class LO,
class GO,
class NT>
4916 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4921 return Teuchos::rcp (
new MV (map, numVectors));
4924 template <
class ST,
class LO,
class GO,
class NT>
4942 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4943 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4944 template class MultiVector< SCALAR , LO , GO , NODE >; \
4945 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4946 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4947 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4948 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4951 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4952 template class MultiVector< SCALAR , LO , GO , NODE >; \
4953 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4955 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4958 #define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
4960 template Teuchos::RCP< MultiVector< SO , LO , GO , NODE > > \
4961 MultiVector< SI , LO , GO , NODE >::convert< SO > () const;
4964 #endif // TPETRA_MULTIVECTOR_DEF_HPP
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
One or more distributed dense vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
static bool debug()
Whether Tpetra is in debug mode.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
int local_ordinal_type
Default value of Scalar template parameter.
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Insert new values that don't currently exist.
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
wrapped_dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
Functions for initializing and finalizing Tpetra.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Declaration of Tpetra::Details::isInterComm.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
Replace existing values with new values.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
void start()
Start the deep_copy counter.
A parallel distribution of indices over processes.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
Base class for distributed Tpetra objects that support data redistribution.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
EWhichNorm
Input argument for normImpl() (which see).
Accumulate new values into existing values (may not be supported in all classes)
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.