11 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
12 #define TPETRA_MULTIVECTOR_DEF_HPP
24 #include "Tpetra_Vector.hpp"
36 #include "Tpetra_Details_Random.hpp"
37 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
38 # include "Teuchos_SerialDenseMatrix.hpp"
39 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
40 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
41 #include "KokkosCompat_View.hpp"
42 #include "KokkosBlas.hpp"
43 #include "KokkosKernels_Utils.hpp"
44 #include "Kokkos_Random.hpp"
45 #include "Kokkos_ArithTraits.hpp"
49 #ifdef HAVE_TPETRA_INST_FLOAT128
52 template<
class Generator>
53 struct rand<Generator, __float128> {
54 static KOKKOS_INLINE_FUNCTION __float128 max ()
56 return static_cast<__float128
> (1.0);
58 static KOKKOS_INLINE_FUNCTION __float128
63 const __float128 scalingFactor =
64 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
65 static_cast<__float128> (2.0);
66 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
67 const __float128 lowerOrderTerm =
68 static_cast<__float128
> (gen.drand ()) * scalingFactor;
69 return higherOrderTerm + lowerOrderTerm;
71 static KOKKOS_INLINE_FUNCTION __float128
72 draw (Generator& gen,
const __float128& range)
75 const __float128 scalingFactor =
76 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
77 static_cast<__float128> (2.0);
78 const __float128 higherOrderTerm =
79 static_cast<__float128
> (gen.drand (range));
80 const __float128 lowerOrderTerm =
81 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
82 return higherOrderTerm + lowerOrderTerm;
84 static KOKKOS_INLINE_FUNCTION __float128
85 draw (Generator& gen,
const __float128&
start,
const __float128& end)
88 const __float128 scalingFactor =
89 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
90 static_cast<__float128> (2.0);
91 const __float128 higherOrderTerm =
92 static_cast<__float128
> (gen.drand (start, end));
93 const __float128 lowerOrderTerm =
94 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
95 return higherOrderTerm + lowerOrderTerm;
99 #endif // HAVE_TPETRA_INST_FLOAT128
117 template<
class ST,
class LO,
class GO,
class NT>
119 allocDualView (
const size_t lclNumRows,
120 const size_t numCols,
121 const bool zeroOut =
true,
122 const bool allowPadding =
false)
124 using ::Tpetra::Details::Behavior;
125 using Kokkos::AllowPadding;
126 using Kokkos::view_alloc;
127 using Kokkos::WithoutInitializing;
130 typedef typename dual_view_type::t_dev dev_view_type;
136 const std::string label (
"MV::DualView");
137 const bool debug = Behavior::debug ();
147 dev_view_type d_view;
150 d_view = dev_view_type (view_alloc (label, AllowPadding),
151 lclNumRows, numCols);
154 d_view = dev_view_type (view_alloc (label),
155 lclNumRows, numCols);
160 d_view = dev_view_type (view_alloc (label,
163 lclNumRows, numCols);
166 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
167 lclNumRows, numCols);
178 const ST nan = Kokkos::ArithTraits<ST>::nan ();
179 KokkosBlas::fill (d_view, nan);
183 TEUCHOS_TEST_FOR_EXCEPTION
184 (static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
185 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
186 "allocDualView: d_view's dimensions actual dimensions do not match "
187 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
188 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
189 <<
". Please report this bug to the Tpetra developers.");
192 return wrapped_dual_view_type(d_view);
199 template<
class T,
class ExecSpace>
200 struct MakeUnmanagedView {
210 typedef typename std::conditional<
211 Kokkos::SpaceAccessibility<
212 typename ExecSpace::memory_space,
213 Kokkos::HostSpace>::accessible,
214 typename ExecSpace::device_type,
215 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
216 typedef Kokkos::LayoutLeft array_layout;
217 typedef Kokkos::View<T*, array_layout, host_exec_space,
218 Kokkos::MemoryUnmanaged> view_type;
220 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
222 const size_t numEnt =
static_cast<size_t> (x_in.size ());
226 return view_type (x_in.getRawPtr (), numEnt);
232 template<
class WrappedDualViewType>
234 takeSubview (
const WrappedDualViewType& X,
235 const std::pair<size_t, size_t>& rowRng,
236 const Kokkos::ALL_t& colRng)
240 return WrappedDualViewType(X,rowRng,colRng);
243 template<
class WrappedDualViewType>
245 takeSubview (
const WrappedDualViewType& X,
246 const Kokkos::ALL_t& rowRng,
247 const std::pair<size_t, size_t>& colRng)
249 using DualViewType =
typename WrappedDualViewType::DVT;
252 if (X.extent (0) == 0 && X.extent (1) != 0) {
253 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
256 return WrappedDualViewType(X,rowRng,colRng);
260 template<
class WrappedDualViewType>
262 takeSubview (
const WrappedDualViewType& X,
263 const std::pair<size_t, size_t>& rowRng,
264 const std::pair<size_t, size_t>& colRng)
266 using DualViewType =
typename WrappedDualViewType::DVT;
276 if (X.extent (0) == 0 && X.extent (1) != 0) {
277 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
280 return WrappedDualViewType(X,rowRng,colRng);
284 template<
class WrappedOrNotDualViewType>
286 getDualViewStride (
const WrappedOrNotDualViewType& dv)
292 size_t strides[WrappedOrNotDualViewType::t_dev::rank+1];
294 const size_t LDA = strides[1];
295 const size_t numRows = dv.extent (0);
298 return (numRows == 0) ? size_t (1) : numRows;
305 template<
class ViewType>
307 getViewStride (
const ViewType& view)
309 const size_t LDA = view.stride (1);
310 const size_t numRows = view.extent (0);
313 return (numRows == 0) ? size_t (1) : numRows;
320 template <
class impl_scalar_type,
class buffer_device_type>
323 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports
326 if (! imports.need_sync_device ()) {
331 size_t localLengthThreshold =
333 return imports.extent(0) <= localLengthThreshold;
338 template <
class SC,
class LO,
class GO,
class NT>
340 runKernelOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
342 if (! X.need_sync_device ()) {
347 size_t localLengthThreshold =
349 return X.getLocalLength () <= localLengthThreshold;
353 template <
class SC,
class LO,
class GO,
class NT>
355 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
359 using namespace Tpetra;
362 using val_type =
typename MV::impl_scalar_type;
363 using mag_type =
typename MV::mag_type;
364 using dual_view_type =
typename MV::dual_view_type;
367 auto comm = map.is_null () ?
nullptr : map->getComm ().getRawPtr ();
368 auto whichVecs = getMultiVectorWhichVectors (X);
372 const bool runOnHost = runKernelOnHost (X);
374 using view_type =
typename dual_view_type::t_host;
375 using array_layout =
typename view_type::array_layout;
376 using device_type =
typename view_type::device_type;
379 normImpl<val_type, array_layout, device_type,
380 mag_type> (norms, X_lcl, whichNorm, whichVecs,
381 isConstantStride, isDistributed, comm);
384 using view_type =
typename dual_view_type::t_dev;
385 using array_layout =
typename view_type::array_layout;
386 using device_type =
typename view_type::device_type;
389 normImpl<val_type, array_layout, device_type,
390 mag_type> (norms, X_lcl, whichNorm, whichVecs,
391 isConstantStride, isDistributed, comm);
400 template <
typename DstView,
typename SrcView>
401 struct AddAssignFunctor {
404 AddAssignFunctor(DstView &tgt_, SrcView &src_) : tgt(tgt_), src(src_) {}
406 KOKKOS_INLINE_FUNCTION
void
407 operator () (
const size_t k)
const { tgt(k) += src(k); }
414 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
418 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
421 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
427 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
430 const size_t numVecs,
431 const bool zeroOut) :
437 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
440 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
443 const Teuchos::DataAccess copyOrView) :
445 view_ (source.view_),
446 whichVectors_ (source.whichVectors_)
449 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
450 "const Teuchos::DataAccess): ";
454 if (copyOrView == Teuchos::Copy) {
458 this->
view_ = cpy.view_;
461 else if (copyOrView == Teuchos::View) {
464 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
465 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
466 "invalid value " << copyOrView <<
". Valid values include "
467 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
468 << Teuchos::View <<
".");
472 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
475 const dual_view_type& view) :
477 view_ (wrapped_dual_view_type(view))
479 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
480 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
481 map->getLocalNumElements ();
482 const size_t lclNumRows_view = view.extent (0);
483 const size_t LDA = getDualViewStride (
view_);
485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
486 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
487 std::invalid_argument,
"Kokkos::DualView does not match Map. "
488 "map->getLocalNumElements() = " << lclNumRows_map
489 <<
", view.extent(0) = " << lclNumRows_view
490 <<
", and getStride() = " << LDA <<
".");
492 using ::Tpetra::Details::Behavior;
493 const bool debug = Behavior::debug ();
495 using ::Tpetra::Details::checkGlobalDualViewValidity;
496 std::ostringstream gblErrStrm;
497 const bool verbose = Behavior::verbose ();
498 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
499 const bool gblValid =
500 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
502 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
503 (! gblValid, std::runtime_error, gblErrStrm.str ());
508 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
511 const wrapped_dual_view_type& view) :
515 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
516 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
517 map->getLocalNumElements ();
518 const size_t lclNumRows_view = view.extent (0);
519 const size_t LDA = getDualViewStride (view);
521 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
522 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
523 std::invalid_argument,
"Kokkos::DualView does not match Map. "
524 "map->getLocalNumElements() = " << lclNumRows_map
525 <<
", view.extent(0) = " << lclNumRows_view
526 <<
", and getStride() = " << LDA <<
".");
528 using ::Tpetra::Details::Behavior;
529 const bool debug = Behavior::debug ();
531 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
532 std::ostringstream gblErrStrm;
533 const bool verbose = Behavior::verbose ();
534 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
535 const bool gblValid =
536 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
538 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
539 (! gblValid, std::runtime_error, gblErrStrm.str ());
545 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
548 const typename dual_view_type::t_dev& d_view) :
551 using Teuchos::ArrayRCP;
553 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
557 const size_t LDA = getViewStride (d_view);
558 const size_t lclNumRows = map->getLocalNumElements ();
559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
560 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
561 "Kokkos::View. map->getLocalNumElements() = " << lclNumRows
562 <<
", View's column stride = " << LDA
563 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
565 auto h_view = Kokkos::create_mirror_view (d_view);
567 view_ = wrapped_dual_view_type(dual_view);
569 using ::Tpetra::Details::Behavior;
570 const bool debug = Behavior::debug ();
572 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
573 std::ostringstream gblErrStrm;
574 const bool verbose = Behavior::verbose ();
575 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
576 const bool gblValid =
577 checkGlobalWrappedDualViewValidity (&gblErrStrm,
view_, verbose,
579 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
580 (! gblValid, std::runtime_error, gblErrStrm.str ());
585 dual_view.modify_device ();
588 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
591 const dual_view_type& view,
592 const dual_view_type& origView) :
594 view_ (wrapped_dual_view_type(view,origView))
596 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
598 const size_t LDA = getDualViewStride (origView);
600 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
601 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
602 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
603 <<
". This may also mean that the input origView's first dimension (number "
604 "of rows = " << origView.extent (0) <<
") does not not match the number "
605 "of entries in the Map on the calling process.");
607 using ::Tpetra::Details::Behavior;
608 const bool debug = Behavior::debug ();
610 using ::Tpetra::Details::checkGlobalDualViewValidity;
611 std::ostringstream gblErrStrm;
612 const bool verbose = Behavior::verbose ();
613 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
614 const bool gblValid_0 =
615 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
617 const bool gblValid_1 =
618 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
620 const bool gblValid = gblValid_0 && gblValid_1;
621 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
622 (! gblValid, std::runtime_error, gblErrStrm.str ());
627 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
630 const dual_view_type& view,
631 const Teuchos::ArrayView<const size_t>& whichVectors) :
634 whichVectors_ (whichVectors.begin (), whichVectors.end ())
637 using Kokkos::subview;
638 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
640 using ::Tpetra::Details::Behavior;
641 const bool debug = Behavior::debug ();
643 using ::Tpetra::Details::checkGlobalDualViewValidity;
644 std::ostringstream gblErrStrm;
645 const bool verbose = Behavior::verbose ();
646 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
647 const bool gblValid =
648 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
651 (! gblValid, std::runtime_error, gblErrStrm.str ());
654 const size_t lclNumRows = map.is_null () ? size_t (0) :
655 map->getLocalNumElements ();
662 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
663 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
664 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
665 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
666 if (whichVectors.size () != 0) {
667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
668 view.extent (1) != 0 && view.extent (1) == 0,
669 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
670 " = " << whichVectors.size () <<
" > 0.");
671 size_t maxColInd = 0;
672 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
673 for (size_type k = 0; k < whichVectors.size (); ++k) {
674 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
675 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
676 std::invalid_argument,
"whichVectors[" << k <<
"] = "
677 "Teuchos::OrdinalTraits<size_t>::invalid().");
678 maxColInd = std::max (maxColInd, whichVectors[k]);
680 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
681 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
682 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
683 <<
" <= max(whichVectors) = " << maxColInd <<
".");
688 const size_t LDA = getDualViewStride (view);
689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
690 (LDA < lclNumRows, std::invalid_argument,
691 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
693 if (whichVectors.size () == 1) {
703 const std::pair<size_t, size_t> colRng (whichVectors[0],
704 whichVectors[0] + 1);
711 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
714 const wrapped_dual_view_type& view,
715 const Teuchos::ArrayView<const size_t>& whichVectors) :
718 whichVectors_ (whichVectors.begin (), whichVectors.end ())
721 using Kokkos::subview;
722 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
724 using ::Tpetra::Details::Behavior;
725 const bool debug = Behavior::debug ();
727 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
728 std::ostringstream gblErrStrm;
729 const bool verbose = Behavior::verbose ();
730 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
731 const bool gblValid =
732 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
734 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
735 (! gblValid, std::runtime_error, gblErrStrm.str ());
738 const size_t lclNumRows = map.is_null () ? size_t (0) :
739 map->getLocalNumElements ();
746 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
747 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
748 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
749 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
750 if (whichVectors.size () != 0) {
751 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
752 view.extent (1) != 0 && view.extent (1) == 0,
753 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
754 " = " << whichVectors.size () <<
" > 0.");
755 size_t maxColInd = 0;
756 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
757 for (size_type k = 0; k < whichVectors.size (); ++k) {
758 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
759 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
760 std::invalid_argument,
"whichVectors[" << k <<
"] = "
761 "Teuchos::OrdinalTraits<size_t>::invalid().");
762 maxColInd = std::max (maxColInd, whichVectors[k]);
764 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
765 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
766 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
767 <<
" <= max(whichVectors) = " << maxColInd <<
".");
772 const size_t LDA = getDualViewStride (view);
773 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
774 (LDA < lclNumRows, std::invalid_argument,
775 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
777 if (whichVectors.size () == 1) {
787 const std::pair<size_t, size_t> colRng (whichVectors[0],
788 whichVectors[0] + 1);
796 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
799 const dual_view_type& view,
800 const dual_view_type& origView,
801 const Teuchos::ArrayView<const size_t>& whichVectors) :
803 view_ (wrapped_dual_view_type(view,origView)),
804 whichVectors_ (whichVectors.begin (), whichVectors.end ())
807 using Kokkos::subview;
808 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
810 using ::Tpetra::Details::Behavior;
811 const bool debug = Behavior::debug ();
813 using ::Tpetra::Details::checkGlobalDualViewValidity;
814 std::ostringstream gblErrStrm;
815 const bool verbose = Behavior::verbose ();
816 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
817 const bool gblValid_0 =
818 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
820 const bool gblValid_1 =
821 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
823 const bool gblValid = gblValid_0 && gblValid_1;
824 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
825 (! gblValid, std::runtime_error, gblErrStrm.str ());
835 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
836 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
837 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
838 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
839 if (whichVectors.size () != 0) {
840 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
841 view.extent (1) != 0 && view.extent (1) == 0,
842 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
843 " = " << whichVectors.size () <<
" > 0.");
844 size_t maxColInd = 0;
845 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
846 for (size_type k = 0; k < whichVectors.size (); ++k) {
847 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
848 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
849 std::invalid_argument,
"whichVectors[" << k <<
"] = "
850 "Teuchos::OrdinalTraits<size_t>::invalid().");
851 maxColInd = std::max (maxColInd, whichVectors[k]);
853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
854 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
855 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
856 <<
" <= max(whichVectors) = " << maxColInd <<
".");
861 const size_t LDA = getDualViewStride (origView);
862 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
863 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
864 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
865 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
866 <<
", origView.stride(1) = " << origView.view_device().stride(1) <<
".");
868 if (whichVectors.size () == 1) {
877 const std::pair<size_t, size_t> colRng (whichVectors[0],
878 whichVectors[0] + 1);
886 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
889 const Teuchos::ArrayView<const Scalar>& data,
891 const size_t numVecs) :
894 typedef LocalOrdinal LO;
895 typedef GlobalOrdinal GO;
896 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
902 const size_t lclNumRows =
903 map.is_null () ? size_t (0) : map->getLocalNumElements ();
904 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
905 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
906 "map->getLocalNumElements() = " << lclNumRows <<
".");
908 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
909 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
910 (static_cast<size_t> (data.size ()) < minNumEntries,
911 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
912 "entries, given the input Map and number of vectors in the MultiVector."
913 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
914 "map->getLocalNumElements () = " << minNumEntries <<
".");
917 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
929 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
930 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
931 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
936 const size_t outStride =
937 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
938 if (LDA == outStride) {
945 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
947 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
949 for (
size_t j = 0; j < numVecs; ++j) {
950 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
951 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
958 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
961 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
962 const size_t numVecs) :
966 typedef LocalOrdinal LO;
967 typedef GlobalOrdinal GO;
968 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
971 const size_t lclNumRows =
972 map.is_null () ? size_t (0) : map->getLocalNumElements ();
973 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
974 (numVecs < 1 || numVecs != static_cast<size_t> (ArrayOfPtrs.size ()),
975 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
976 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
977 for (
size_t j = 0; j < numVecs; ++j) {
978 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
979 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
980 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
981 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
982 << X_j_av.size () <<
" < map->getLocalNumElements() = " << lclNumRows
986 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
991 using array_layout =
typename decltype (X_out)::array_layout;
992 using input_col_view_type =
typename Kokkos::View<
const IST*,
995 Kokkos::MemoryUnmanaged>;
997 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
998 for (
size_t j = 0; j < numVecs; ++j) {
999 Teuchos::ArrayView<const IST> X_j_av =
1000 Teuchos::av_reinterpret_cast<
const IST> (ArrayOfPtrs[j]);
1001 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
1002 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
1008 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1011 return whichVectors_.empty ();
1014 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1019 if (this->getMap ().is_null ()) {
1020 return static_cast<size_t> (0);
1022 return this->getMap ()->getLocalNumElements ();
1026 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1031 if (this->getMap ().is_null ()) {
1032 return static_cast<size_t> (0);
1034 return this->getMap ()->getGlobalNumElements ();
1038 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1043 return isConstantStride () ? getDualViewStride (view_) : size_t (0);
1046 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1052 auto thisData = view_.getDualView().view_host().data();
1053 auto otherData = other.
view_.getDualView().view_host().data();
1054 size_t thisSpan = view_.getDualView().view_host().span();
1055 size_t otherSpan = other.
view_.getDualView().view_host().span();
1056 return (otherData <= thisData && thisData < otherData + otherSpan)
1057 || (thisData <= otherData && otherData < thisData + thisSpan);
1060 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1069 const MV* src =
dynamic_cast<const MV*
> (&sourceObj);
1070 if (src ==
nullptr) {
1084 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1088 return this->getNumVectors ();
1091 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1096 const size_t numSameIDs,
1097 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1098 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1100 const execution_space &space)
1102 using ::Tpetra::Details::Behavior;
1104 using ::Tpetra::Details::ProfilingRegion;
1106 using KokkosRefactor::Details::permute_array_multi_column;
1107 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
1108 using Kokkos::Compat::create_const_view;
1112 MV& sourceMV =
const_cast<MV &
>(
dynamic_cast<const MV&
> (sourceObj));
1113 const bool copyOnHost = runKernelOnHost(sourceMV);
1114 const char longFuncNameHost[] =
"Tpetra::MultiVector::copyAndPermute[Host]";
1115 const char longFuncNameDevice[] =
"Tpetra::MultiVector::copyAndPermute[Device]";
1116 const char tfecfFuncName[] =
"copyAndPermute: ";
1117 ProfilingRegion regionCAP (copyOnHost ? longFuncNameHost : longFuncNameDevice);
1119 const bool verbose = Behavior::verbose ();
1120 std::unique_ptr<std::string> prefix;
1122 auto map = this->getMap ();
1123 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1124 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1125 std::ostringstream os;
1126 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
1127 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1128 os <<
"Start" << endl;
1129 std::cerr << os.str ();
1132 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1133 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
1134 std::logic_error,
"permuteToLIDs.extent(0) = "
1135 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
1136 << permuteFromLIDs.extent (0) <<
".");
1137 const size_t numCols = this->getNumVectors ();
1141 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1142 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1143 std::logic_error,
"Input MultiVector needs sync to both host "
1146 std::ostringstream os;
1147 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1148 std::cerr << os.str ();
1152 std::ostringstream os;
1153 os << *prefix <<
"Copy" << endl;
1154 std::cerr << os.str ();
1179 if (numSameIDs > 0) {
1180 const std::pair<size_t, size_t> rows (0, numSameIDs);
1182 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1183 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1185 for (
size_t j = 0; j < numCols; ++j) {
1186 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1187 const size_t srcCol =
1188 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1190 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1191 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1195 Kokkos::RangePolicy<execution_space, size_t>;
1196 range_t rp(space, 0,numSameIDs);
1197 Tpetra::Details::AddAssignFunctor<decltype(tgt_j), decltype(src_j)>
1199 Kokkos::parallel_for(rp, aaf);
1210 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1211 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1213 for (
size_t j = 0; j < numCols; ++j) {
1214 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1215 const size_t srcCol =
1216 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1218 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1219 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1223 Kokkos::RangePolicy<execution_space, size_t>;
1224 range_t rp(space, 0,numSameIDs);
1225 Tpetra::Details::AddAssignFunctor<decltype(tgt_j), decltype(src_j)>
1227 Kokkos::parallel_for(rp, aaf);
1250 if (permuteFromLIDs.extent (0) == 0 ||
1251 permuteToLIDs.extent (0) == 0) {
1253 std::ostringstream os;
1254 os << *prefix <<
"No permutations. Done!" << endl;
1255 std::cerr << os.str ();
1261 std::ostringstream os;
1262 os << *prefix <<
"Permute" << endl;
1263 std::cerr << os.str ();
1268 const bool nonConstStride =
1269 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1272 std::ostringstream os;
1273 os << *prefix <<
"nonConstStride="
1274 << (nonConstStride ?
"true" :
"false") << endl;
1275 std::cerr << os.str ();
1282 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1283 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1284 if (nonConstStride) {
1285 if (this->whichVectors_.size () == 0) {
1286 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1287 tmpTgt.modify_host ();
1288 for (
size_t j = 0; j < numCols; ++j) {
1289 tmpTgt.view_host()(j) = j;
1292 tmpTgt.sync_device ();
1294 tgtWhichVecs = tmpTgt;
1297 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1299 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1303 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1304 (static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1305 this->getNumVectors (),
1306 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1307 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1308 this->getNumVectors () <<
".");
1310 if (sourceMV.whichVectors_.size () == 0) {
1311 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1312 tmpSrc.modify_host ();
1313 for (
size_t j = 0; j < numCols; ++j) {
1314 tmpSrc.view_host()(j) = j;
1317 tmpSrc.sync_device ();
1319 srcWhichVecs = tmpSrc;
1322 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1323 sourceMV.whichVectors_ ();
1325 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1329 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1330 (static_cast<size_t> (srcWhichVecs.extent (0)) !=
1331 sourceMV.getNumVectors (), std::logic_error,
1332 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1333 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1339 std::ostringstream os;
1340 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1341 std::cerr << os.str ();
1343 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1344 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1346 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1347 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1348 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1349 auto permuteFromLIDs_h =
1350 create_const_view (permuteFromLIDs.view_host ());
1353 std::ostringstream os;
1354 os << *prefix <<
"Permute on host" << endl;
1355 std::cerr << os.str ();
1357 if (nonConstStride) {
1360 auto tgtWhichVecs_h =
1361 create_const_view (tgtWhichVecs.view_host ());
1362 auto srcWhichVecs_h =
1363 create_const_view (srcWhichVecs.view_host ());
1365 using op_type = KokkosRefactor::Details::AddOp;
1366 permute_array_multi_column_variable_stride (tgt_h, src_h,
1370 srcWhichVecs_h, numCols,
1374 using op_type = KokkosRefactor::Details::InsertOp;
1375 permute_array_multi_column_variable_stride (tgt_h, src_h,
1379 srcWhichVecs_h, numCols,
1385 using op_type = KokkosRefactor::Details::AddOp;
1386 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1387 permuteFromLIDs_h, numCols, op_type());
1390 using op_type = KokkosRefactor::Details::InsertOp;
1391 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1392 permuteFromLIDs_h, numCols, op_type());
1398 std::ostringstream os;
1399 os << *prefix <<
"Get permute LIDs on device" << endl;
1400 std::cerr << os.str ();
1402 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1403 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1405 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1406 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1407 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1408 auto permuteFromLIDs_d =
1409 create_const_view (permuteFromLIDs.view_device ());
1412 std::ostringstream os;
1413 os << *prefix <<
"Permute on device" << endl;
1414 std::cerr << os.str ();
1416 if (nonConstStride) {
1419 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1420 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1422 using op_type = KokkosRefactor::Details::AddOp;
1423 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1427 srcWhichVecs_d, numCols,
1431 using op_type = KokkosRefactor::Details::InsertOp;
1432 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1436 srcWhichVecs_d, numCols,
1442 using op_type = KokkosRefactor::Details::AddOp;
1443 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1444 permuteFromLIDs_d, numCols, op_type());
1447 using op_type = KokkosRefactor::Details::InsertOp;
1448 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1449 permuteFromLIDs_d, numCols, op_type());
1455 std::ostringstream os;
1456 os << *prefix <<
"Done!" << endl;
1457 std::cerr << os.str ();
1462 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1465 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1467 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1470 copyAndPermute(sourceObj, numSameIDs, permuteToLIDs, permuteFromLIDs, CM,
1475 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1480 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1481 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1482 Kokkos::DualView<size_t*, buffer_device_type> ,
1483 size_t& constantNumPackets,
1484 const execution_space &space)
1486 using ::Tpetra::Details::Behavior;
1487 using ::Tpetra::Details::ProfilingRegion;
1489 using Kokkos::Compat::create_const_view;
1490 using Kokkos::Compat::getKokkosViewDeepCopy;
1495 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
> (sourceObj));
1497 const bool packOnHost = runKernelOnHost(sourceMV);
1498 const char longFuncNameHost[] =
"Tpetra::MultiVector::packAndPrepare[Host]";
1499 const char longFuncNameDevice[] =
"Tpetra::MultiVector::packAndPrepare[Device]";
1500 const char tfecfFuncName[] =
"packAndPrepare: ";
1501 ProfilingRegion regionPAP (packOnHost ? longFuncNameHost : longFuncNameDevice);
1509 const bool debugCheckIndices = Behavior::debug ();
1514 const bool printDebugOutput = Behavior::verbose ();
1516 std::unique_ptr<std::string> prefix;
1517 if (printDebugOutput) {
1518 auto map = this->getMap ();
1519 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1520 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1521 std::ostringstream os;
1522 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1523 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1524 os <<
"Start" << endl;
1525 std::cerr << os.str ();
1529 const size_t numCols = sourceMV.getNumVectors ();
1534 constantNumPackets = numCols;
1538 if (exportLIDs.extent (0) == 0) {
1539 if (printDebugOutput) {
1540 std::ostringstream os;
1541 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1542 std::cerr << os.str ();
1562 const size_t numExportLIDs = exportLIDs.extent (0);
1563 const size_t newExportsSize = numCols * numExportLIDs;
1564 if (printDebugOutput) {
1565 std::ostringstream os;
1566 os << *prefix <<
"realloc: "
1567 <<
"numExportLIDs: " << numExportLIDs
1568 <<
", exports.extent(0): " << exports.extent (0)
1569 <<
", newExportsSize: " << newExportsSize << std::endl;
1570 std::cerr << os.str ();
1576 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1577 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1578 std::logic_error,
"Input MultiVector needs sync to both host "
1580 if (printDebugOutput) {
1581 std::ostringstream os;
1582 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1583 std::cerr << os.str ();
1595 exports.clear_sync_state ();
1596 exports.modify_host ();
1604 exports.clear_sync_state ();
1605 exports.modify_device ();
1621 if (sourceMV.isConstantStride ()) {
1622 using KokkosRefactor::Details::pack_array_single_column;
1623 if (printDebugOutput) {
1624 std::ostringstream os;
1625 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1626 std::cerr << os.str ();
1629 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1630 pack_array_single_column (exports.view_host (),
1632 exportLIDs.view_host (),
1637 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1638 pack_array_single_column (exports.view_device (),
1640 exportLIDs.view_device (),
1647 using KokkosRefactor::Details::pack_array_single_column;
1648 if (printDebugOutput) {
1649 std::ostringstream os;
1650 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1651 std::cerr << os.str ();
1654 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1655 pack_array_single_column (exports.view_host (),
1657 exportLIDs.view_host (),
1658 sourceMV.whichVectors_[0],
1662 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1663 pack_array_single_column (exports.view_device (),
1665 exportLIDs.view_device (),
1666 sourceMV.whichVectors_[0],
1673 if (sourceMV.isConstantStride ()) {
1674 using KokkosRefactor::Details::pack_array_multi_column;
1675 if (printDebugOutput) {
1676 std::ostringstream os;
1677 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1678 std::cerr << os.str ();
1681 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1682 pack_array_multi_column (exports.view_host (),
1684 exportLIDs.view_host (),
1689 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1690 pack_array_multi_column (exports.view_device (),
1692 exportLIDs.view_device (),
1699 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1700 if (printDebugOutput) {
1701 std::ostringstream os;
1702 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1704 std::cerr << os.str ();
1709 using IST = impl_scalar_type;
1710 using DV = Kokkos::DualView<IST*, device_type>;
1711 using HES =
typename DV::t_host::execution_space;
1712 using DES =
typename DV::t_dev::execution_space;
1713 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1715 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1716 pack_array_multi_column_variable_stride
1717 (exports.view_host (),
1719 exportLIDs.view_host (),
1720 getKokkosViewDeepCopy<HES> (whichVecs),
1725 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1726 pack_array_multi_column_variable_stride
1727 (exports.view_device (),
1729 exportLIDs.view_device (),
1730 getKokkosViewDeepCopy<DES> (whichVecs),
1732 debugCheckIndices, space);
1737 if (printDebugOutput) {
1738 std::ostringstream os;
1739 os << *prefix <<
"Done!" << endl;
1740 std::cerr << os.str ();
1746 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1751 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1752 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1753 Kokkos::DualView<size_t*, buffer_device_type> numExportPacketsPerLID,
1754 size_t& constantNumPackets) {
1755 packAndPrepare(sourceObj, exportLIDs, exports, numExportPacketsPerLID, constantNumPackets, execution_space());
1759 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1761 typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1762 typename NO::device_type::memory_space>::value,
1767 const std::string* prefix,
1768 const bool areRemoteLIDsContiguous,
1777 std::ostringstream os;
1778 os << *prefix <<
"Realloc (if needed) imports_ from "
1779 << this->imports_.extent (0) <<
" to " << newSize << std::endl;
1780 std::cerr << os.str ();
1783 bool reallocated =
false;
1785 using IST = impl_scalar_type;
1786 using DV = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
1798 if ((std::is_same_v<typename dual_view_type::t_dev::device_type, typename dual_view_type::t_host::device_type> ||
1799 (Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_device()) ||
1800 (!Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_host())) &&
1801 areRemoteLIDsContiguous &&
1803 (getNumVectors() == 1) &&
1806 size_t offset = getLocalLength () - newSize;
1807 reallocated = this->imports_.view_device().data() != view_.getDualView().view_device().data() + offset;
1809 typedef std::pair<size_t, size_t> range_type;
1810 this->imports_ = DV(view_.getDualView(),
1811 range_type (offset, getLocalLength () ),
1815 std::ostringstream os;
1816 os << *prefix <<
"Aliased imports_ to MV.view_" << std::endl;
1817 std::cerr << os.str ();
1827 std::ostringstream os;
1828 os << *prefix <<
"Finished realloc'ing unaliased_imports_" << std::endl;
1829 std::cerr << os.str ();
1831 this->imports_ = this->unaliased_imports_;
1836 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1838 typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1839 typename NO::device_type::memory_space>::value,
1844 const std::string* prefix,
1845 const bool areRemoteLIDsContiguous,
1851 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1856 const std::string* prefix,
1857 const bool areRemoteLIDsContiguous,
1860 return reallocImportsIfNeededImpl<Node>(newSize, verbose, prefix, areRemoteLIDsContiguous, CM);
1863 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1867 return (this->imports_.view_device().data() + this->imports_.view_device().extent(0) ==
1868 view_.getDualView().view_device().data() + view_.getDualView().view_device().extent(0));
1872 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1876 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1877 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1878 Kokkos::DualView<size_t*, buffer_device_type> ,
1879 const size_t constantNumPackets,
1881 const execution_space &space)
1883 using ::Tpetra::Details::Behavior;
1884 using ::Tpetra::Details::ProfilingRegion;
1885 using KokkosRefactor::Details::unpack_array_multi_column;
1886 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1887 using Kokkos::Compat::getKokkosViewDeepCopy;
1890 const bool unpackOnHost = runKernelOnHost(imports);
1892 const char longFuncNameHost[] =
"Tpetra::MultiVector::unpackAndCombine[Host]";
1893 const char longFuncNameDevice[] =
"Tpetra::MultiVector::unpackAndCombine[Device]";
1894 const char * longFuncName = unpackOnHost ? longFuncNameHost : longFuncNameDevice;
1895 const char tfecfFuncName[] =
"unpackAndCombine: ";
1896 ProfilingRegion regionUAC (longFuncName);
1904 const bool debugCheckIndices = Behavior::debug ();
1906 const bool printDebugOutput = Behavior::verbose ();
1907 std::unique_ptr<std::string> prefix;
1908 if (printDebugOutput) {
1909 auto map = this->getMap ();
1910 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1911 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1912 std::ostringstream os;
1913 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1914 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1915 os <<
"Start" << endl;
1916 std::cerr << os.str ();
1920 if (importLIDs.extent (0) == 0) {
1921 if (printDebugOutput) {
1922 std::ostringstream os;
1923 os << *prefix <<
"No imports. Done!" << endl;
1924 std::cerr << os.str ();
1930 if (importsAreAliased()) {
1931 if (printDebugOutput) {
1932 std::ostringstream os;
1933 os << *prefix <<
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" << endl;
1934 std::cerr << os.str ();
1940 const size_t numVecs = getNumVectors ();
1941 if (debugCheckIndices) {
1942 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1943 (static_cast<size_t> (imports.extent (0)) !=
1944 numVecs * importLIDs.extent (0),
1946 "imports.extent(0) = " << imports.extent (0)
1947 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1948 <<
" * " << importLIDs.extent (0) <<
" = "
1949 << numVecs * importLIDs.extent (0) <<
".");
1951 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1952 (constantNumPackets == static_cast<size_t> (0), std::runtime_error,
1953 "constantNumPackets input argument must be nonzero.");
1955 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1956 (static_cast<size_t> (numVecs) !=
1957 static_cast<size_t> (constantNumPackets),
1958 std::runtime_error,
"constantNumPackets must equal numVecs.");
1967 if (this->imports_.need_sync_host()) this->imports_.sync_host();
1970 if (this->imports_.need_sync_device()) this->imports_.sync_device();
1973 if (printDebugOutput) {
1974 std::ostringstream os;
1975 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1977 std::cerr << os.str ();
1982 auto imports_d = imports.view_device ();
1983 auto imports_h = imports.view_host ();
1984 auto importLIDs_d = importLIDs.view_device ();
1985 auto importLIDs_h = importLIDs.view_host ();
1987 Kokkos::DualView<size_t*, device_type> whichVecs;
1988 if (! isConstantStride ()) {
1989 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1990 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1992 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1994 whichVecs.modify_host ();
1999 whichVecs.modify_device ();
2004 auto whichVecs_d = whichVecs.view_device ();
2005 auto whichVecs_h = whichVecs.view_host ();
2015 if (numVecs > 0 && importLIDs.extent (0) > 0) {
2016 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
2017 using host_exec_space =
typename dual_view_type::t_host::execution_space;
2020 const bool use_atomic_updates = unpackOnHost ?
2021 host_exec_space().concurrency () != 1 :
2022 dev_exec_space().concurrency () != 1;
2024 if (printDebugOutput) {
2025 std::ostringstream os;
2027 std::cerr << os.str ();
2034 using op_type = KokkosRefactor::Details::InsertOp;
2035 if (isConstantStride ()) {
2037 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2038 unpack_array_multi_column (host_exec_space (),
2039 X_h, imports_h, importLIDs_h,
2040 op_type (), numVecs,
2046 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2047 unpack_array_multi_column (space,
2048 X_d, imports_d, importLIDs_d,
2049 op_type (), numVecs,
2056 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2057 unpack_array_multi_column_variable_stride (host_exec_space (),
2067 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2068 unpack_array_multi_column_variable_stride (space,
2080 using op_type = KokkosRefactor::Details::AddOp;
2081 if (isConstantStride ()) {
2083 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2084 unpack_array_multi_column (host_exec_space (),
2085 X_h, imports_h, importLIDs_h,
2086 op_type (), numVecs,
2091 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2092 unpack_array_multi_column (space,
2093 X_d, imports_d, importLIDs_d,
2094 op_type (), numVecs,
2101 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2102 unpack_array_multi_column_variable_stride (host_exec_space (),
2112 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2113 unpack_array_multi_column_variable_stride (space,
2125 using op_type = KokkosRefactor::Details::AbsMaxOp;
2126 if (isConstantStride ()) {
2128 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2129 unpack_array_multi_column (host_exec_space (),
2130 X_h, imports_h, importLIDs_h,
2131 op_type (), numVecs,
2136 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2137 unpack_array_multi_column (space,
2138 X_d, imports_d, importLIDs_d,
2139 op_type (), numVecs,
2146 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2147 unpack_array_multi_column_variable_stride (host_exec_space (),
2157 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2158 unpack_array_multi_column_variable_stride (space,
2170 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2171 (
true, std::logic_error,
"Invalid CombineMode");
2175 if (printDebugOutput) {
2176 std::ostringstream os;
2177 os << *prefix <<
"Nothing to unpack" << endl;
2178 std::cerr << os.str ();
2182 if (printDebugOutput) {
2183 std::ostringstream os;
2184 os << *prefix <<
"Done!" << endl;
2185 std::cerr << os.str ();
2190 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2194 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
2195 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
2196 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
2197 const size_t constantNumPackets,
2199 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets, CM, execution_space());
2203 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2208 if (isConstantStride ()) {
2209 return static_cast<size_t> (view_.extent (1));
2211 return static_cast<size_t> (whichVectors_.size ());
2219 gblDotImpl (
const RV& dotsOut,
2220 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2221 const bool distributed)
2223 using Teuchos::REDUCE_MAX;
2224 using Teuchos::REDUCE_SUM;
2225 using Teuchos::reduceAll;
2226 typedef typename RV::non_const_value_type dot_type;
2228 const size_t numVecs = dotsOut.extent (0);
2243 if (distributed && ! comm.is_null ()) {
2246 const int nv =
static_cast<int> (numVecs);
2247 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
2249 if (commIsInterComm) {
2253 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
2256 const dot_type*
const lclSum = lclDots.data ();
2257 dot_type*
const gblSum = dotsOut.data ();
2258 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2261 dot_type*
const inout = dotsOut.data ();
2262 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
2268 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2272 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
2274 using ::Tpetra::Details::Behavior;
2275 using Kokkos::subview;
2276 using Teuchos::Comm;
2277 using Teuchos::null;
2280 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
2281 typedef typename dual_view_type::t_dev_const XMV;
2282 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
2286 const size_t numVecs = this->getNumVectors ();
2290 const size_t lclNumRows = this->getLocalLength ();
2291 const size_t numDots =
static_cast<size_t> (dots.extent (0));
2292 const bool debug = Behavior::debug ();
2295 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
2296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2297 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
2298 "compatible with the input MultiVector A. We only test for this "
2309 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2311 "MultiVectors do not have the same local length. "
2312 "this->getLocalLength() = " << lclNumRows <<
" != "
2314 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2316 "MultiVectors must have the same number of columns (vectors). "
2317 "this->getNumVectors() = " << numVecs <<
" != "
2319 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2320 numDots != numVecs, std::runtime_error,
2321 "The output array 'dots' must have the same number of entries as the "
2322 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2323 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2325 const std::pair<size_t, size_t> colRng (0, numVecs);
2326 RV dotsOut = subview (dots, colRng);
2327 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
2328 this->getMap ()->getComm ();
2330 auto thisView = this->getLocalViewDevice(Access::ReadOnly);
2333 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
2334 this->whichVectors_.getRawPtr (),
2345 exec_space_instance.fence();
2347 gblDotImpl (dotsOut, comm, this->isDistributed ());
2351 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2356 using ::Tpetra::Details::ProfilingRegion;
2358 using dot_type =
typename MV::dot_type;
2359 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
2362 Teuchos::RCP<const Teuchos::Comm<int> > comm =
2363 map.is_null () ? Teuchos::null : map->getComm ();
2364 if (comm.is_null ()) {
2365 return Kokkos::ArithTraits<dot_type>::zero ();
2368 using LO = LocalOrdinal;
2372 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
2374 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
2375 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
2376 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
2383 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
2385 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
2386 lclDot = KokkosBlas::dot (x_1d, y_1d);
2389 using Teuchos::outArg;
2390 using Teuchos::REDUCE_SUM;
2391 using Teuchos::reduceAll;
2392 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2404 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2408 const Teuchos::ArrayView<dot_type>& dots)
const
2410 const char tfecfFuncName[] =
"dot: ";
2413 const size_t numVecs = this->getNumVectors ();
2414 const size_t lclNumRows = this->getLocalLength ();
2415 const size_t numDots =
static_cast<size_t> (dots.size ());
2425 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2427 "MultiVectors do not have the same local length. "
2428 "this->getLocalLength() = " << lclNumRows <<
" != "
2430 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2432 "MultiVectors must have the same number of columns (vectors). "
2433 "this->getNumVectors() = " << numVecs <<
" != "
2435 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2436 (numDots != numVecs, std::runtime_error,
2437 "The output array 'dots' must have the same number of entries as the "
2438 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2439 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2442 const dot_type gblDot = multiVectorSingleColumnDot (*
this, A);
2446 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2450 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2453 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2456 using ::Tpetra::Details::NORM_TWO;
2457 using ::Tpetra::Details::ProfilingRegion;
2458 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2461 MV& X =
const_cast<MV&
> (*this);
2462 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2465 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2468 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2470 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2471 this->norm2 (norms_av);
2475 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2478 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2481 using ::Tpetra::Details::NORM_ONE;
2482 using ::Tpetra::Details::ProfilingRegion;
2483 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2486 MV& X =
const_cast<MV&
> (*this);
2487 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2490 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2493 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2495 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2496 this->norm1 (norms_av);
2499 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2502 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2505 using ::Tpetra::Details::NORM_INF;
2506 using ::Tpetra::Details::ProfilingRegion;
2507 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2510 MV& X =
const_cast<MV&
> (*this);
2511 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2514 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2517 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2519 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2520 this->normInf (norms_av);
2523 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2526 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2530 using Kokkos::subview;
2531 using Teuchos::Comm;
2533 using Teuchos::reduceAll;
2534 using Teuchos::REDUCE_SUM;
2535 typedef Kokkos::ArithTraits<impl_scalar_type> ATS;
2537 const size_t lclNumRows = this->getLocalLength ();
2538 const size_t numVecs = this->getNumVectors ();
2539 const size_t numMeans =
static_cast<size_t> (means.size ());
2541 TEUCHOS_TEST_FOR_EXCEPTION(
2542 numMeans != numVecs, std::runtime_error,
2543 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2544 <<
" != this->getNumVectors() = " << numVecs <<
".");
2546 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2547 const std::pair<size_t, size_t> colRng (0, numVecs);
2552 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2554 typename local_view_type::HostMirror::array_layout,
2556 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2557 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2559 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2560 this->getMap ()->getComm ();
2563 const bool useHostVersion = this->need_sync_device ();
2564 if (useHostVersion) {
2566 auto X_lcl = subview (getLocalViewHost(Access::ReadOnly),
2567 rowRng, Kokkos::ALL ());
2569 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2570 if (isConstantStride ()) {
2571 KokkosBlas::sum (lclSums, X_lcl);
2574 for (
size_t j = 0; j < numVecs; ++j) {
2575 const size_t col = whichVectors_[j];
2576 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2583 if (! comm.is_null () && this->isDistributed ()) {
2584 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2585 lclSums.data (), meansOut.data ());
2594 auto X_lcl = subview (this->getLocalViewDevice(Access::ReadOnly),
2595 rowRng, Kokkos::ALL ());
2598 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2599 if (isConstantStride ()) {
2600 KokkosBlas::sum (lclSums, X_lcl);
2603 for (
size_t j = 0; j < numVecs; ++j) {
2604 const size_t col = whichVectors_[j];
2605 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2615 exec_space_instance.fence();
2621 if (! comm.is_null () && this->isDistributed ()) {
2622 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2623 lclSums.data (), meansOut.data ());
2634 const impl_scalar_type OneOverN =
2635 ATS::one () /
static_cast<mag_type> (this->getGlobalLength ());
2636 for (
size_t k = 0; k < numMeans; ++k) {
2637 meansOut(k) = meansOut(k) * OneOverN;
2642 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2648 typedef Kokkos::ArithTraits<IST> ATS;
2649 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2650 typedef typename pool_type::generator_type generator_type;
2652 const IST max = Kokkos::rand<generator_type, IST>::max ();
2653 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2655 this->randomize (static_cast<Scalar> (min), static_cast<Scalar> (max));
2659 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2665 typedef Tpetra::Details::Static_Random_XorShift64_Pool<typename device_type::execution_space> tpetra_pool_type;
2666 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2669 if(!tpetra_pool_type::isSet())
2670 tpetra_pool_type::resetPool(this->getMap()->getComm()->getRank());
2672 pool_type & rand_pool = tpetra_pool_type::getPool();
2673 const IST max =
static_cast<IST
> (maxVal);
2674 const IST min =
static_cast<IST
> (minVal);
2676 auto thisView = this->getLocalViewDevice(Access::OverwriteAll);
2678 if (isConstantStride ()) {
2679 Kokkos::fill_random (thisView, rand_pool, min, max);
2682 const size_t numVecs = getNumVectors ();
2683 for (
size_t k = 0; k < numVecs; ++k) {
2684 const size_t col = whichVectors_[k];
2685 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2686 Kokkos::fill_random (X_k, rand_pool, min, max);
2691 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2696 using ::Tpetra::Details::ProfilingRegion;
2697 using ::Tpetra::Details::Blas::fill;
2698 using DES =
typename dual_view_type::t_dev::execution_space;
2699 using HES =
typename dual_view_type::t_host::execution_space;
2700 using LO = LocalOrdinal;
2701 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2706 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
2707 const LO numVecs =
static_cast<LO
> (this->getNumVectors ());
2713 const bool runOnHost = runKernelOnHost(*
this);
2715 auto X = this->getLocalViewDevice(Access::OverwriteAll);
2716 if (this->isConstantStride ()) {
2717 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2720 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2721 this->whichVectors_.getRawPtr ());
2725 auto X = this->getLocalViewHost(Access::OverwriteAll);
2726 if (this->isConstantStride ()) {
2727 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2730 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2731 this->whichVectors_.getRawPtr ());
2737 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2742 using Teuchos::ArrayRCP;
2743 using Teuchos::Comm;
2746 using LO = LocalOrdinal;
2747 using GO = GlobalOrdinal;
2753 TEUCHOS_TEST_FOR_EXCEPTION(
2754 ! this->isConstantStride (), std::logic_error,
2755 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2756 "if the MultiVector is a column view of another MultiVector (that is, if "
2757 "isConstantStride() == false).");
2787 if (this->getMap ().is_null ()) {
2792 TEUCHOS_TEST_FOR_EXCEPTION(
2793 newMap.is_null (), std::invalid_argument,
2794 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2795 "This probably means that the input Map is incorrect.");
2799 const size_t newNumRows = newMap->getLocalNumElements ();
2800 const size_t origNumRows = view_.extent (0);
2801 const size_t numCols = this->getNumVectors ();
2803 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2804 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2807 else if (newMap.is_null ()) {
2810 const size_t newNumRows =
static_cast<size_t> (0);
2811 const size_t numCols = this->getNumVectors ();
2812 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2815 this->map_ = newMap;
2818 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2826 const IST theAlpha =
static_cast<IST
> (alpha);
2827 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2830 const size_t lclNumRows = getLocalLength ();
2831 const size_t numVecs = getNumVectors ();
2832 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2833 const std::pair<size_t, size_t> colRng (0, numVecs);
2841 const bool useHostVersion = need_sync_device ();
2842 if (useHostVersion) {
2843 auto Y_lcl = Kokkos::subview (getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2844 if (isConstantStride ()) {
2845 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2848 for (
size_t k = 0; k < numVecs; ++k) {
2849 const size_t Y_col = whichVectors_[k];
2850 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2851 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2856 auto Y_lcl = Kokkos::subview (getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2857 if (isConstantStride ()) {
2858 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2861 for (
size_t k = 0; k < numVecs; ++k) {
2862 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2863 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2864 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2871 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2874 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2876 const size_t numVecs = this->getNumVectors ();
2877 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2878 TEUCHOS_TEST_FOR_EXCEPTION(
2879 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2880 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2884 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2885 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2886 k_alphas.modify_host ();
2887 for (
size_t i = 0; i < numAlphas; ++i) {
2888 k_alphas.view_host()(i) = static_cast<impl_scalar_type> (alphas[i]);
2890 k_alphas.sync_device ();
2892 this->scale (k_alphas.view_device ());
2895 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2898 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2901 using Kokkos::subview;
2903 const size_t lclNumRows = this->getLocalLength ();
2904 const size_t numVecs = this->getNumVectors ();
2905 TEUCHOS_TEST_FOR_EXCEPTION(
2906 static_cast<size_t> (alphas.extent (0)) != numVecs,
2907 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2908 "alphas.extent(0) = " << alphas.extent (0)
2909 <<
" != this->getNumVectors () = " << numVecs <<
".");
2910 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2911 const std::pair<size_t, size_t> colRng (0, numVecs);
2921 const bool useHostVersion = this->need_sync_device ();
2922 if (useHostVersion) {
2925 auto alphas_h = Kokkos::create_mirror_view (alphas);
2929 auto Y_lcl = subview (this->getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2930 if (isConstantStride ()) {
2931 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2934 for (
size_t k = 0; k < numVecs; ++k) {
2935 const size_t Y_col = this->isConstantStride () ? k :
2936 this->whichVectors_[k];
2937 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2940 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2945 auto Y_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2946 if (isConstantStride ()) {
2947 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2954 auto alphas_h = Kokkos::create_mirror_view (alphas);
2958 for (
size_t k = 0; k < numVecs; ++k) {
2959 const size_t Y_col = this->isConstantStride () ? k :
2960 this->whichVectors_[k];
2961 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2962 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2968 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2975 using Kokkos::subview;
2976 const char tfecfFuncName[] =
"scale: ";
2978 const size_t lclNumRows = getLocalLength ();
2979 const size_t numVecs = getNumVectors ();
2981 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2983 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2985 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2987 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2991 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2992 const std::pair<size_t, size_t> colRng (0, numVecs);
2994 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
2996 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
2997 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
3000 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
3004 for (
size_t k = 0; k < numVecs; ++k) {
3005 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3007 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3008 auto X_k = subview (X_lcl, ALL (), X_col);
3010 KokkosBlas::scal (Y_k, theAlpha, X_k);
3017 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3022 const char tfecfFuncName[] =
"reciprocal: ";
3024 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3026 "MultiVectors do not have the same local length. "
3027 "this->getLocalLength() = " << getLocalLength ()
3029 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3030 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3031 ": MultiVectors do not have the same number of columns (vectors). "
3032 "this->getNumVectors() = " << getNumVectors ()
3033 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3035 const size_t numVecs = getNumVectors ();
3037 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
3041 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
3045 using Kokkos::subview;
3046 for (
size_t k = 0; k < numVecs; ++k) {
3047 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3048 auto vector_k = subview (this_view_dev, ALL (), this_col);
3049 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3050 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3051 KokkosBlas::reciprocal (vector_k, vector_Ak);
3056 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3061 const char tfecfFuncName[] =
"abs";
3063 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3065 ": MultiVectors do not have the same local length. "
3066 "this->getLocalLength() = " << getLocalLength ()
3068 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3069 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3070 ": MultiVectors do not have the same number of columns (vectors). "
3071 "this->getNumVectors() = " << getNumVectors ()
3072 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3073 const size_t numVecs = getNumVectors ();
3075 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
3079 KokkosBlas::abs (this_view_dev, A_view_dev);
3083 using Kokkos::subview;
3085 for (
size_t k=0; k < numVecs; ++k) {
3086 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3087 auto vector_k = subview (this_view_dev, ALL (), this_col);
3088 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3089 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3090 KokkosBlas::abs (vector_k, vector_Ak);
3095 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3102 const char tfecfFuncName[] =
"update: ";
3103 using Kokkos::subview;
3108 const size_t lclNumRows = getLocalLength ();
3109 const size_t numVecs = getNumVectors ();
3112 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3114 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
3116 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3118 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3124 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3125 const std::pair<size_t, size_t> colRng (0, numVecs);
3127 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
3128 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
3130 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
3134 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
3138 for (
size_t k = 0; k < numVecs; ++k) {
3139 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3141 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3142 auto X_k = subview (X_lcl, ALL (), X_col);
3144 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
3149 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3156 const Scalar& gamma)
3159 using Kokkos::subview;
3161 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3165 const size_t lclNumRows = this->getLocalLength ();
3166 const size_t numVecs = getNumVectors ();
3169 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3171 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
3172 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3174 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3176 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
3177 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3179 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3181 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
3182 "but this MultiVector has " << numVecs <<
" column(s).");
3183 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3185 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
3186 "but this MultiVector has " << numVecs <<
" column(s).");
3193 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3194 const std::pair<size_t, size_t> colRng (0, numVecs);
3199 auto C_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
3204 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3209 for (
size_t k = 0; k < numVecs; ++k) {
3210 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3213 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3214 theBeta, subview (B_lcl, rowRng, B_col),
3215 theGamma, subview (C_lcl, rowRng, this_col));
3220 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3221 Teuchos::ArrayRCP<const Scalar>
3227 const char tfecfFuncName[] =
"getData: ";
3234 auto hostView = getLocalViewHost(Access::ReadOnly);
3235 const size_t col = isConstantStride () ? j : whichVectors_[j];
3236 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
3237 Teuchos::ArrayRCP<const IST> dataAsArcp =
3238 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3240 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3241 (static_cast<size_t> (hostView_j.extent (0)) <
3242 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3243 "hostView_j.extent(0) = " << hostView_j.extent (0)
3244 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3245 "Please report this bug to the Tpetra developers.");
3247 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3250 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3251 Teuchos::ArrayRCP<Scalar>
3256 using Kokkos::subview;
3258 const char tfecfFuncName[] =
"getDataNonConst: ";
3260 auto hostView = getLocalViewHost(Access::ReadWrite);
3261 const size_t col = isConstantStride () ? j : whichVectors_[j];
3262 auto hostView_j = subview (hostView, ALL (), col);
3263 Teuchos::ArrayRCP<IST> dataAsArcp =
3264 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3267 (static_cast<size_t> (hostView_j.extent (0)) <
3268 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3269 "hostView_j.extent(0) = " << hostView_j.extent (0)
3270 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3271 "Please report this bug to the Tpetra developers.");
3273 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3276 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3277 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3279 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3286 bool contiguous =
true;
3287 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3288 for (
size_t j = 1; j < numCopyVecs; ++j) {
3289 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3294 if (contiguous && numCopyVecs > 0) {
3295 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3298 RCP<const MV> X_sub = this->subView (cols);
3299 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3305 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3306 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3313 RCP<const MV> X_sub = this->subView (colRng);
3314 RCP<MV> Y (
new MV (this->getMap (), static_cast<size_t> (colRng.size ()),
false));
3319 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3323 return view_.origExtent(0);
3326 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3330 return view_.origExtent(1);
3333 template <
class Scalar,
class LO,
class GO,
class Node>
3336 const Teuchos::RCP<const map_type>& subMap,
3337 const local_ordinal_type rowOffset) :
3341 using Kokkos::subview;
3342 using Teuchos::outArg;
3345 using Teuchos::reduceAll;
3346 using Teuchos::REDUCE_MIN;
3349 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3350 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3353 std::unique_ptr<std::ostringstream> errStrm;
3360 const auto comm = subMap->getComm ();
3361 TEUCHOS_ASSERT( ! comm.is_null () );
3362 const int myRank = comm->getRank ();
3364 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3366 const LO newNumRows =
static_cast<LO
> (subMap->getLocalNumElements ());
3368 std::ostringstream os;
3369 os <<
"Proc " << myRank <<
": " << prefix
3370 <<
"X: {lclNumRows: " << lclNumRowsBefore
3372 <<
", numCols: " << numCols <<
"}, "
3373 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3374 std::cerr << os.str ();
3379 const bool tooManyElts =
3382 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3383 *errStrm <<
" Proc " << myRank <<
": subMap->getLocalNumElements() (="
3384 << newNumRows <<
") + rowOffset (=" << rowOffset
3388 TEUCHOS_TEST_FOR_EXCEPTION
3389 (! debug && tooManyElts, std::invalid_argument,
3390 prefix << errStrm->str () << suffix);
3394 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3396 std::ostringstream gblErrStrm;
3397 const std::string myErrStr =
3398 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3399 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3400 TEUCHOS_TEST_FOR_EXCEPTION
3401 (
true, std::invalid_argument, gblErrStrm.str ());
3405 using range_type = std::pair<LO, LO>;
3406 const range_type origRowRng
3407 (rowOffset, static_cast<LO> (X.
view_.origExtent (0)));
3408 const range_type rowRng
3409 (rowOffset, rowOffset + newNumRows);
3411 wrapped_dual_view_type newView = takeSubview (X.
view_, rowRng, ALL ());
3418 if (newView.extent (0) == 0 &&
3419 newView.extent (1) != X.
view_.extent (1)) {
3421 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3425 MV (subMap, newView) :
3429 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3430 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3431 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3433 if (errStrm.get () ==
nullptr) {
3434 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3436 *errStrm <<
" Proc " << myRank <<
3437 ": subMap.getLocalNumElements(): " << newNumRows <<
3438 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3439 ", X.getNumVectors(): " << numCols <<
3440 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3442 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3444 std::ostringstream gblErrStrm;
3446 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3447 "dimensions on one or more processes:" << endl;
3449 const std::string myErrStr =
3450 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3451 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3452 gblErrStrm << suffix << endl;
3453 TEUCHOS_TEST_FOR_EXCEPTION
3454 (
true, std::invalid_argument, gblErrStrm.str ());
3459 std::ostringstream os;
3460 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3461 std::cerr << os.str ();
3467 std::ostringstream os;
3468 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3469 std::cerr << os.str ();
3473 template <
class Scalar,
class LO,
class GO,
class Node>
3476 const map_type& subMap,
3477 const size_t rowOffset) :
3478 MultiVector (X, Teuchos::RCP<const map_type> (new map_type (subMap)),
3482 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3483 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3486 const size_t offset)
const
3489 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3492 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3493 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3496 const size_t offset)
3499 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3502 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3503 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3505 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3507 using Teuchos::Array;
3511 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3512 TEUCHOS_TEST_FOR_EXCEPTION(
3513 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3514 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3515 "contain at least one entry, but cols.size() = " << cols.size ()
3520 bool contiguous =
true;
3521 for (
size_t j = 1; j < numViewCols; ++j) {
3522 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3528 if (numViewCols == 0) {
3530 return rcp (
new MV (this->getMap (), numViewCols));
3533 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3537 if (isConstantStride ()) {
3538 return rcp (
new MV (this->getMap (), view_, cols));
3541 Array<size_t> newcols (cols.size ());
3542 for (
size_t j = 0; j < numViewCols; ++j) {
3543 newcols[j] = whichVectors_[cols[j]];
3545 return rcp (
new MV (this->getMap (), view_, newcols ()));
3550 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3551 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3555 using ::Tpetra::Details::Behavior;
3557 using Kokkos::subview;
3558 using Teuchos::Array;
3562 const char tfecfFuncName[] =
"subView(Range1D): ";
3564 const size_t lclNumRows = this->getLocalLength ();
3565 const size_t numVecs = this->getNumVectors ();
3569 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3570 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3571 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3574 numVecs != 0 && colRng.size () != 0 &&
3575 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3576 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3577 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3578 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3579 "[0, " << numVecs <<
"].");
3581 RCP<const MV> X_ret;
3591 const std::pair<size_t, size_t> rows (0, lclNumRows);
3592 if (colRng.size () == 0) {
3593 const std::pair<size_t, size_t> cols (0, 0);
3594 wrapped_dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3595 X_ret = rcp (
new MV (this->getMap (), X_sub));
3599 if (isConstantStride ()) {
3600 const std::pair<size_t, size_t> cols (colRng.lbound (),
3601 colRng.ubound () + 1);
3602 wrapped_dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3603 X_ret = rcp (
new MV (this->getMap (), X_sub));
3606 if (static_cast<size_t> (colRng.size ()) == static_cast<size_t> (1)) {
3609 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3610 whichVectors_[0] + colRng.ubound () + 1);
3611 wrapped_dual_view_type X_sub = takeSubview (view_, ALL (), col);
3612 X_ret = rcp (
new MV (this->getMap (), X_sub));
3615 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3616 whichVectors_.begin () + colRng.ubound () + 1);
3617 X_ret = rcp (
new MV (this->getMap (), view_, which));
3622 const bool debug = Behavior::debug ();
3624 using Teuchos::Comm;
3625 using Teuchos::outArg;
3626 using Teuchos::REDUCE_MIN;
3627 using Teuchos::reduceAll;
3629 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3630 Teuchos::null : this->getMap ()->getComm ();
3631 if (! comm.is_null ()) {
3635 if (X_ret.is_null ()) {
3638 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3640 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3641 "MultiVector; the return value of this method) is null on some MPI "
3642 "process in this MultiVector's communicator. This should never "
3643 "happen. Please report this bug to the Tpetra developers.");
3644 if (! X_ret.is_null () &&
3645 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3648 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3649 outArg (gblSuccess));
3650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3651 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3652 "colRng.size(), on at least one MPI process in this MultiVector's "
3653 "communicator. This should never happen. "
3654 "Please report this bug to the Tpetra developers.");
3661 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3662 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3667 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3671 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3672 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3677 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3681 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3687 using Kokkos::subview;
3688 typedef std::pair<size_t, size_t> range_type;
3689 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3692 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3693 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3694 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3696 static_cast<size_t> (j) :
3698 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3709 const size_t newSize = X.
imports_.extent (0) / numCols;
3710 const size_t offset = jj*newSize;
3711 auto device_view = subview (X.
imports_.view_device(),
3712 range_type (offset, offset+newSize));
3713 auto host_view = subview (X.
imports_.view_host(),
3714 range_type (offset, offset+newSize));
3718 const size_t newSize = X.
exports_.extent (0) / numCols;
3719 const size_t offset = jj*newSize;
3720 auto device_view = subview (X.
exports_.view_device(),
3721 range_type (offset, offset+newSize));
3722 auto host_view = subview (X.
exports_.view_host(),
3723 range_type (offset, offset+newSize));
3734 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3735 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3740 return Teuchos::rcp (
new V (*
this, j));
3744 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3745 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3750 return Teuchos::rcp (
new V (*
this, j));
3754 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3757 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3760 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3762 Kokkos::MemoryUnmanaged>;
3763 const char tfecfFuncName[] =
"get1dCopy: ";
3765 const size_t numRows = this->getLocalLength ();
3766 const size_t numCols = this->getNumVectors ();
3768 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3769 (LDA < numRows, std::runtime_error,
3770 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3771 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3772 (numRows >
size_t (0) && numCols >
size_t (0) &&
3773 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3775 "A.size() = " << A.size () <<
", but its size must be at least "
3776 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3778 const std::pair<size_t, size_t> rowRange (0, numRows);
3779 const std::pair<size_t, size_t> colRange (0, numCols);
3781 input_view_type A_view_orig (reinterpret_cast<IST*> (A.getRawPtr ()),
3783 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3796 if (this->need_sync_host() && this->need_sync_device()) {
3799 throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
3802 const bool useHostView = view_.host_view_use_count() >= view_.device_view_use_count();
3803 if (this->isConstantStride ()) {
3805 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3809 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3815 for (
size_t j = 0; j < numCols; ++j) {
3816 const size_t srcCol = this->whichVectors_[j];
3817 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3820 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3821 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3825 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3826 auto srcColView_device = Kokkos::subview (srcView_device, rowRange, srcCol);
3836 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3839 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3842 const char tfecfFuncName[] =
"get2dCopy: ";
3843 const size_t numRows = this->getLocalLength ();
3844 const size_t numCols = this->getNumVectors ();
3846 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3847 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3848 std::runtime_error,
"Input array of pointers must contain as many "
3849 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3850 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3852 if (numRows != 0 && numCols != 0) {
3854 for (
size_t j = 0; j < numCols; ++j) {
3855 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3856 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3857 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3858 "the input array of arrays is not long enough to fit all entries in "
3859 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3860 <<
" < getLocalLength() = " << numRows <<
".");
3864 for (
size_t j = 0; j < numCols; ++j) {
3865 Teuchos::RCP<const V> X_j = this->getVector (j);
3866 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3867 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3873 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3874 Teuchos::ArrayRCP<const Scalar>
3878 if (getLocalLength () == 0 || getNumVectors () == 0) {
3879 return Teuchos::null;
3881 TEUCHOS_TEST_FOR_EXCEPTION(
3882 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3883 "get1dView: This MultiVector does not have constant stride, so it is "
3884 "not possible to view its data as a single array. You may check "
3885 "whether a MultiVector has constant stride by calling "
3886 "isConstantStride().");
3890 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3891 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3892 Kokkos::Compat::persistingView (X_lcl);
3893 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3897 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3898 Teuchos::ArrayRCP<Scalar>
3902 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3903 return Teuchos::null;
3906 TEUCHOS_TEST_FOR_EXCEPTION
3907 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3908 "get1dViewNonConst: This MultiVector does not have constant stride, "
3909 "so it is not possible to view its data as a single array. You may "
3910 "check whether a MultiVector has constant stride by calling "
3911 "isConstantStride().");
3912 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3913 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3914 Kokkos::Compat::persistingView (X_lcl);
3915 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3919 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3920 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3924 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3930 const size_t myNumRows = this->getLocalLength ();
3931 const size_t numCols = this->getNumVectors ();
3932 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3934 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3935 for (
size_t j = 0; j < numCols; ++j) {
3936 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3937 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3938 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3939 Kokkos::Compat::persistingView (X_lcl_j);
3940 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3945 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3950 return view_.getHostView(s);
3953 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3958 return view_.getHostView(s);
3961 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3966 return view_.getHostView(s);
3969 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3974 return view_.getDeviceView(s);
3977 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3982 return view_.getDeviceView(s);
3985 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3990 return view_.getDeviceView(s);
3993 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4000 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4001 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
4008 auto X_lcl = getLocalViewHost(Access::ReadOnly);
4014 const size_t myNumRows = this->getLocalLength ();
4015 const size_t numCols = this->getNumVectors ();
4016 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
4018 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
4019 for (
size_t j = 0; j < numCols; ++j) {
4020 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
4021 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
4022 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
4023 Kokkos::Compat::persistingView (X_lcl_j);
4024 views[j] = Teuchos::arcp_reinterpret_cast<
const Scalar> (X_lcl_j_arcp);
4029 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4033 Teuchos::ETransp transB,
4034 const Scalar& alpha,
4039 using ::Tpetra::Details::ProfilingRegion;
4040 using Teuchos::CONJ_TRANS;
4041 using Teuchos::NO_TRANS;
4042 using Teuchos::TRANS;
4045 using Teuchos::rcpFromRef;
4047 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
4049 using STS = Teuchos::ScalarTraits<Scalar>;
4051 const char tfecfFuncName[] =
"multiply: ";
4052 ProfilingRegion region (
"Tpetra::MV::multiply");
4084 const bool C_is_local = ! this->isDistributed ();
4094 auto myMap = this->getMap ();
4095 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
4096 using Teuchos::REDUCE_MIN;
4097 using Teuchos::reduceAll;
4098 using Teuchos::outArg;
4100 auto comm = myMap->getComm ();
4101 const size_t A_nrows =
4103 const size_t A_ncols =
4105 const size_t B_nrows =
4107 const size_t B_ncols =
4110 const bool lclBad = this->getLocalLength () != A_nrows ||
4111 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
4113 const int myRank = comm->getRank ();
4114 std::ostringstream errStrm;
4115 if (this->getLocalLength () != A_nrows) {
4116 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
4117 << this->getLocalLength () <<
" != A_nrows=" << A_nrows
4118 <<
"." << std::endl;
4120 if (this->getNumVectors () != B_ncols) {
4121 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
4122 << this->getNumVectors () <<
" != B_ncols=" << B_ncols
4123 <<
"." << std::endl;
4125 if (A_ncols != B_nrows) {
4126 errStrm <<
"Proc " << myRank <<
": A_ncols="
4127 << A_ncols <<
" != B_nrows=" << B_nrows
4128 <<
"." << std::endl;
4131 const int lclGood = lclBad ? 0 : 1;
4133 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
4135 std::ostringstream os;
4136 ::Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
4138 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4139 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
4140 "least one process in this object's communicator." << std::endl
4142 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
4144 << (transA == Teuchos::TRANS ?
"^T" :
4145 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4146 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
4148 << (transA == Teuchos::TRANS ?
"^T" :
4149 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4150 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
4151 <<
"Global dimensions: C(" << this->getGlobalLength () <<
", "
4161 const bool Case1 = C_is_local && A_is_local && B_is_local;
4163 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
4164 transA != NO_TRANS &&
4167 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
4171 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4172 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
4173 "Multiplication of op(A) and op(B) into *this is not a "
4174 "supported use case.");
4176 if (beta != STS::zero () && Case2) {
4182 const int myRank = this->getMap ()->getComm ()->getRank ();
4184 beta_local = ATS::zero ();
4193 if (! isConstantStride ()) {
4194 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
4196 C_tmp = rcp (
this,
false);
4199 RCP<const MV> A_tmp;
4201 A_tmp = rcp (
new MV (A, Teuchos::Copy));
4203 A_tmp = rcpFromRef (A);
4206 RCP<const MV> B_tmp;
4208 B_tmp = rcp (
new MV (B, Teuchos::Copy));
4210 B_tmp = rcpFromRef (B);
4213 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4214 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
4215 ! A_tmp->isConstantStride (), std::logic_error,
4216 "Failed to make temporary constant-stride copies of MultiVectors.");
4219 const LO A_lclNumRows = A_tmp->getLocalLength ();
4220 const LO A_numVecs = A_tmp->getNumVectors ();
4221 auto A_lcl = A_tmp->getLocalViewDevice(Access::ReadOnly);
4222 auto A_sub = Kokkos::subview (A_lcl,
4223 std::make_pair (LO (0), A_lclNumRows),
4224 std::make_pair (LO (0), A_numVecs));
4227 const LO B_lclNumRows = B_tmp->getLocalLength ();
4228 const LO B_numVecs = B_tmp->getNumVectors ();
4229 auto B_lcl = B_tmp->getLocalViewDevice(Access::ReadOnly);
4230 auto B_sub = Kokkos::subview (B_lcl,
4231 std::make_pair (LO (0), B_lclNumRows),
4232 std::make_pair (LO (0), B_numVecs));
4234 const LO C_lclNumRows = C_tmp->getLocalLength ();
4235 const LO C_numVecs = C_tmp->getNumVectors ();
4237 auto C_lcl = C_tmp->getLocalViewDevice(Access::ReadWrite);
4238 auto C_sub = Kokkos::subview (C_lcl,
4239 std::make_pair (LO (0), C_lclNumRows),
4240 std::make_pair (LO (0), C_numVecs));
4241 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4242 (transA == Teuchos::TRANS ?
'T' :
'C'));
4243 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4244 (transB == Teuchos::TRANS ?
'T' :
'C'));
4247 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
4249 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4253 if (! isConstantStride ()) {
4258 A_tmp = Teuchos::null;
4259 B_tmp = Teuchos::null;
4267 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4276 using Kokkos::subview;
4277 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4279 const size_t lclNumRows = this->getLocalLength ();
4280 const size_t numVecs = this->getNumVectors ();
4282 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4284 std::runtime_error,
"MultiVectors do not have the same local length.");
4285 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4286 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4287 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4290 auto this_view = this->getLocalViewDevice(Access::ReadWrite);
4300 KokkosBlas::mult (scalarThis,
4303 subview (A_view, ALL (), 0),
4307 for (
size_t j = 0; j < numVecs; ++j) {
4308 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4310 KokkosBlas::mult (scalarThis,
4311 subview (this_view, ALL (), C_col),
4313 subview (A_view, ALL (), 0),
4314 subview (B_view, ALL (), B_col));
4319 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4325 using ::Tpetra::Details::ProfilingRegion;
4326 ProfilingRegion region (
"Tpetra::MV::reduce");
4328 const auto map = this->getMap ();
4329 if (map.get () ==
nullptr) {
4332 const auto comm = map->getComm ();
4333 if (comm.get () ==
nullptr) {
4339 const bool changed_on_device = this->need_sync_host ();
4340 if (changed_on_device) {
4344 Kokkos::fence(
"MultiVector::reduce");
4345 auto X_lcl = this->getLocalViewDevice(Access::ReadWrite);
4349 auto X_lcl = this->getLocalViewHost(Access::ReadWrite);
4354 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4362 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4363 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4364 TEUCHOS_TEST_FOR_EXCEPTION(
4365 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4367 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4368 <<
" is invalid. The range of valid row indices on this process "
4369 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4370 <<
", " << maxLocalIndex <<
"].");
4371 TEUCHOS_TEST_FOR_EXCEPTION(
4372 vectorIndexOutOfRange(col),
4374 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4375 <<
" of the multivector is invalid.");
4378 auto view = this->getLocalViewHost(Access::ReadWrite);
4379 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4380 view (lclRow, colInd) = ScalarValue;
4384 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4393 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4394 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4395 TEUCHOS_TEST_FOR_EXCEPTION(
4396 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4398 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4399 <<
" is invalid. The range of valid row indices on this process "
4400 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4401 <<
", " << maxLocalIndex <<
"].");
4402 TEUCHOS_TEST_FOR_EXCEPTION(
4403 vectorIndexOutOfRange(col),
4405 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4406 <<
" of the multivector is invalid.");
4409 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4411 auto view = this->getLocalViewHost(Access::ReadWrite);
4413 Kokkos::atomic_add (& (view(lclRow, colInd)), value);
4416 view(lclRow, colInd) += value;
4421 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4430 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4433 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4434 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4435 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4437 "Global row index " << gblRow <<
" is not present on this process "
4438 << this->getMap ()->getComm ()->getRank () <<
".");
4439 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4440 (this->vectorIndexOutOfRange (col), std::runtime_error,
4441 "Vector index " << col <<
" of the MultiVector is invalid.");
4444 this->replaceLocalValue (lclRow, col, ScalarValue);
4447 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4457 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4460 TEUCHOS_TEST_FOR_EXCEPTION(
4461 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4463 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4464 <<
" is not present on this process "
4465 << this->getMap ()->getComm ()->getRank () <<
".");
4466 TEUCHOS_TEST_FOR_EXCEPTION(
4467 vectorIndexOutOfRange(col),
4469 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4470 <<
" of the multivector is invalid.");
4473 this->sumIntoLocalValue (lclRow, col, value, atomic);
4476 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4478 Teuchos::ArrayRCP<T>
4484 typename dual_view_type::array_layout,
4486 const size_t col = isConstantStride () ? j : whichVectors_[j];
4487 col_dual_view_type X_col =
4488 Kokkos::subview (view_, Kokkos::ALL (), col);
4489 return Kokkos::Compat::persistingView (X_col.view_device());
4492 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4499 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4506 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4511 using Teuchos::TypeNameTraits;
4513 std::ostringstream out;
4514 out <<
"\"" << className <<
"\": {";
4515 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4516 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4517 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4518 <<
", Node" << Node::name ()
4520 if (this->getObjectLabel () !=
"") {
4521 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4523 out <<
", numRows: " << this->getGlobalLength ();
4524 if (className !=
"Tpetra::Vector") {
4525 out <<
", numCols: " << this->getNumVectors ()
4526 <<
", isConstantStride: " << this->isConstantStride ();
4528 if (this->isConstantStride ()) {
4529 out <<
", columnStride: " << this->getStride ();
4536 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4541 return this->descriptionImpl (
"Tpetra::MultiVector");
4546 template<
typename ViewType>
4547 void print_vector(Teuchos::FancyOStream & out,
const char* prefix,
const ViewType& v)
4550 static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
4551 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
4552 static_assert(ViewType::rank == 2,
4553 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
4556 out <<
"Values("<<prefix<<
"): " << std::endl
4558 const size_t numRows = v.extent(0);
4559 const size_t numCols = v.extent(1);
4561 for (
size_t i = 0; i < numRows; ++i) {
4563 if (i + 1 < numRows) {
4569 for (
size_t i = 0; i < numRows; ++i) {
4570 for (
size_t j = 0; j < numCols; ++j) {
4572 if (j + 1 < numCols) {
4576 if (i + 1 < numRows) {
4585 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4590 typedef LocalOrdinal LO;
4593 if (vl <= Teuchos::VERB_LOW) {
4594 return std::string ();
4596 auto map = this->getMap ();
4597 if (map.is_null ()) {
4598 return std::string ();
4600 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4601 auto outp = Teuchos::getFancyOStream (outStringP);
4602 Teuchos::FancyOStream& out = *outp;
4603 auto comm = map->getComm ();
4604 const int myRank = comm->getRank ();
4605 const int numProcs = comm->getSize ();
4607 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4608 Teuchos::OSTab tab1 (out);
4611 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4612 out <<
"Local number of rows: " << lclNumRows << endl;
4614 if (vl > Teuchos::VERB_MEDIUM) {
4617 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4618 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4620 if (this->isConstantStride ()) {
4621 out <<
"Column stride: " << this->getStride () << endl;
4624 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4632 auto X_dev = view_.getDeviceCopy();
4633 auto X_host = view_.getHostCopy();
4635 if(X_dev.data() == X_host.data()) {
4637 Details::print_vector(out,
"unified",X_host);
4640 Details::print_vector(out,
"host",X_host);
4641 Details::print_vector(out,
"dev",X_dev);
4646 return outStringP->str ();
4649 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4653 const std::string& className,
4654 const Teuchos::EVerbosityLevel verbLevel)
const
4656 using Teuchos::TypeNameTraits;
4657 using Teuchos::VERB_DEFAULT;
4658 using Teuchos::VERB_NONE;
4659 using Teuchos::VERB_LOW;
4661 const Teuchos::EVerbosityLevel vl =
4662 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4664 if (vl == VERB_NONE) {
4671 auto map = this->getMap ();
4672 if (map.is_null ()) {
4675 auto comm = map->getComm ();
4676 if (comm.is_null ()) {
4680 const int myRank = comm->getRank ();
4689 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4693 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4694 out <<
"\"" << className <<
"\":" << endl;
4695 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4697 out <<
"Template parameters:" << endl;
4698 Teuchos::OSTab tab2 (out);
4699 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4700 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4701 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4702 <<
"Node: " << Node::name () << endl;
4704 if (this->getObjectLabel () !=
"") {
4705 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4707 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4708 if (className !=
"Tpetra::Vector") {
4709 out <<
"Number of columns: " << this->getNumVectors () << endl;
4716 if (vl > VERB_LOW) {
4717 const std::string lclStr = this->localDescribeToString (vl);
4718 ::Tpetra::Details::gathervPrint (out, lclStr, *comm);
4722 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4726 const Teuchos::EVerbosityLevel verbLevel)
const
4728 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4731 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4736 replaceMap (newMap);
4739 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4744 using ::Tpetra::Details::localDeepCopy;
4745 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4747 TEUCHOS_TEST_FOR_EXCEPTION
4749 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4750 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4751 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4752 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4753 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4755 TEUCHOS_TEST_FOR_EXCEPTION
4756 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4757 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4758 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4759 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4774 if (src_last_updated_on_host) {
4775 localDeepCopy (this->getLocalViewHost(Access::ReadWrite),
4777 this->isConstantStride (),
4779 this->whichVectors_ (),
4783 localDeepCopy (this->getLocalViewDevice(Access::ReadWrite),
4785 this->isConstantStride (),
4787 this->whichVectors_ (),
4792 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4794 Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
4801 this->getNumVectors(),
4807 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4812 using ::Tpetra::Details::PackTraits;
4814 const size_t l1 = this->getLocalLength();
4816 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4823 template <
class ST,
class LO,
class GO,
class NT>
4826 std::swap(mv.
map_, this->map_);
4827 std::swap(mv.
view_, this->view_);
4831 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4832 template <
class ST,
class LO,
class GO,
class NT>
4835 const Teuchos::SerialDenseMatrix<int, ST>& src)
4837 using ::Tpetra::Details::localDeepCopy;
4839 using IST =
typename MV::impl_scalar_type;
4840 using input_view_type =
4841 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4842 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4843 using pair_type = std::pair<LO, LO>;
4845 const LO numRows =
static_cast<LO
> (src.numRows ());
4846 const LO numCols =
static_cast<LO
> (src.numCols ());
4848 TEUCHOS_TEST_FOR_EXCEPTION
4851 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4852 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4854 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4856 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4857 input_view_type src_orig (src_raw, src.stride (), numCols);
4858 input_view_type src_view =
4859 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4861 constexpr
bool src_isConstantStride =
true;
4862 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4866 src_isConstantStride,
4867 getMultiVectorWhichVectors (dst),
4871 template <
class ST,
class LO,
class GO,
class NT>
4873 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4876 using ::Tpetra::Details::localDeepCopy;
4878 using IST =
typename MV::impl_scalar_type;
4879 using output_view_type =
4880 Kokkos::View<IST**, Kokkos::LayoutLeft,
4881 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4882 using pair_type = std::pair<LO, LO>;
4884 const LO numRows =
static_cast<LO
> (dst.numRows ());
4885 const LO numCols =
static_cast<LO
> (dst.numCols ());
4887 TEUCHOS_TEST_FOR_EXCEPTION
4890 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4891 << src.
getMap ()->getComm ()->getRank () <<
", src is "
4893 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4895 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4896 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4898 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4900 constexpr
bool dst_isConstantStride =
true;
4901 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4904 localDeepCopy (dst_view,
4906 dst_isConstantStride,
4909 getMultiVectorWhichVectors (src));
4911 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4913 template <
class Scalar,
class LO,
class GO,
class NT>
4914 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4919 return Teuchos::rcp (
new MV (map, numVectors));
4922 template <
class ST,
class LO,
class GO,
class NT>
4940 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4941 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4942 template class MultiVector< SCALAR , LO , GO , NODE >; \
4943 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4944 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4945 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4946 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4949 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4950 template class MultiVector< SCALAR , LO , GO , NODE >; \
4951 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4953 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4956 #define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
4958 template Teuchos::RCP< MultiVector< SO , LO , GO , NODE > > \
4959 MultiVector< SI , LO , GO , NODE >::convert< SO > () const;
4962 #endif // TPETRA_MULTIVECTOR_DEF_HPP
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
One or more distributed dense vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
static bool debug()
Whether Tpetra is in debug mode.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
int local_ordinal_type
Default value of Scalar template parameter.
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Insert new values that don't currently exist.
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
wrapped_dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
Functions for initializing and finalizing Tpetra.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Declaration of Tpetra::Details::isInterComm.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
Replace existing values with new values.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
void start()
Start the deep_copy counter.
A parallel distribution of indices over processes.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
Base class for distributed Tpetra objects that support data redistribution.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
EWhichNorm
Input argument for normImpl() (which see).
Accumulate new values into existing values (may not be supported in all classes)
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.