11 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
12 #define TPETRA_MULTIVECTOR_DEF_HPP
24 #include "Tpetra_Vector.hpp"
36 #include "Tpetra_Details_Random.hpp"
37 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
38 # include "Teuchos_SerialDenseMatrix.hpp"
39 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
40 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
41 #include "KokkosCompat_View.hpp"
42 #include "KokkosBlas.hpp"
43 #include "KokkosKernels_Utils.hpp"
44 #include "Kokkos_Random.hpp"
45 #include "Kokkos_ArithTraits.hpp"
49 #ifdef HAVE_TPETRA_INST_FLOAT128
52 template<
class Generator>
53 struct rand<Generator, __float128> {
54 static KOKKOS_INLINE_FUNCTION __float128 max ()
56 return static_cast<__float128
> (1.0);
58 static KOKKOS_INLINE_FUNCTION __float128
63 const __float128 scalingFactor =
64 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
65 static_cast<__float128> (2.0);
66 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
67 const __float128 lowerOrderTerm =
68 static_cast<__float128
> (gen.drand ()) * scalingFactor;
69 return higherOrderTerm + lowerOrderTerm;
71 static KOKKOS_INLINE_FUNCTION __float128
72 draw (Generator& gen,
const __float128& range)
75 const __float128 scalingFactor =
76 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
77 static_cast<__float128> (2.0);
78 const __float128 higherOrderTerm =
79 static_cast<__float128
> (gen.drand (range));
80 const __float128 lowerOrderTerm =
81 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
82 return higherOrderTerm + lowerOrderTerm;
84 static KOKKOS_INLINE_FUNCTION __float128
85 draw (Generator& gen,
const __float128&
start,
const __float128& end)
88 const __float128 scalingFactor =
89 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
90 static_cast<__float128> (2.0);
91 const __float128 higherOrderTerm =
92 static_cast<__float128
> (gen.drand (start, end));
93 const __float128 lowerOrderTerm =
94 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
95 return higherOrderTerm + lowerOrderTerm;
99 #endif // HAVE_TPETRA_INST_FLOAT128
117 template<
class ST,
class LO,
class GO,
class NT>
119 allocDualView (
const size_t lclNumRows,
120 const size_t numCols,
121 const bool zeroOut =
true,
122 const bool allowPadding =
false)
124 using ::Tpetra::Details::Behavior;
125 using Kokkos::AllowPadding;
126 using Kokkos::view_alloc;
127 using Kokkos::WithoutInitializing;
130 typedef typename dual_view_type::t_dev dev_view_type;
136 const std::string label (
"MV::DualView");
137 const bool debug = Behavior::debug ();
147 dev_view_type d_view;
150 d_view = dev_view_type (view_alloc (label, AllowPadding),
151 lclNumRows, numCols);
154 d_view = dev_view_type (view_alloc (label),
155 lclNumRows, numCols);
160 d_view = dev_view_type (view_alloc (label,
163 lclNumRows, numCols);
166 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
167 lclNumRows, numCols);
178 const ST nan = Kokkos::ArithTraits<ST>::nan ();
179 KokkosBlas::fill (d_view, nan);
183 TEUCHOS_TEST_FOR_EXCEPTION
184 (static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
185 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
186 "allocDualView: d_view's dimensions actual dimensions do not match "
187 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
188 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
189 <<
". Please report this bug to the Tpetra developers.");
192 return wrapped_dual_view_type(d_view);
199 template<
class T,
class ExecSpace>
200 struct MakeUnmanagedView {
210 typedef typename std::conditional<
211 Kokkos::SpaceAccessibility<
212 typename ExecSpace::memory_space,
213 Kokkos::HostSpace>::accessible,
214 typename ExecSpace::device_type,
215 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
216 typedef Kokkos::LayoutLeft array_layout;
217 typedef Kokkos::View<T*, array_layout, host_exec_space,
218 Kokkos::MemoryUnmanaged> view_type;
220 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
222 const size_t numEnt =
static_cast<size_t> (x_in.size ());
226 return view_type (x_in.getRawPtr (), numEnt);
232 template<
class WrappedDualViewType>
234 takeSubview (
const WrappedDualViewType& X,
235 const std::pair<size_t, size_t>& rowRng,
236 const Kokkos::ALL_t& colRng)
240 return WrappedDualViewType(X,rowRng,colRng);
243 template<
class WrappedDualViewType>
245 takeSubview (
const WrappedDualViewType& X,
246 const Kokkos::ALL_t& rowRng,
247 const std::pair<size_t, size_t>& colRng)
249 using DualViewType =
typename WrappedDualViewType::DVT;
252 if (X.extent (0) == 0 && X.extent (1) != 0) {
253 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
256 return WrappedDualViewType(X,rowRng,colRng);
260 template<
class WrappedDualViewType>
262 takeSubview (
const WrappedDualViewType& X,
263 const std::pair<size_t, size_t>& rowRng,
264 const std::pair<size_t, size_t>& colRng)
266 using DualViewType =
typename WrappedDualViewType::DVT;
276 if (X.extent (0) == 0 && X.extent (1) != 0) {
277 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
280 return WrappedDualViewType(X,rowRng,colRng);
284 template<
class WrappedOrNotDualViewType>
286 getDualViewStride (
const WrappedOrNotDualViewType& dv)
292 size_t strides[WrappedOrNotDualViewType::t_dev::rank+1];
294 const size_t LDA = strides[1];
295 const size_t numRows = dv.extent (0);
298 return (numRows == 0) ? size_t (1) : numRows;
305 template<
class ViewType>
307 getViewStride (
const ViewType& view)
309 const size_t LDA = view.stride (1);
310 const size_t numRows = view.extent (0);
313 return (numRows == 0) ? size_t (1) : numRows;
320 template <
class impl_scalar_type,
class buffer_device_type>
323 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports
326 if (! imports.need_sync_device ()) {
331 size_t localLengthThreshold =
333 return imports.extent(0) <= localLengthThreshold;
338 template <
class SC,
class LO,
class GO,
class NT>
340 runKernelOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
342 if (! X.need_sync_device ()) {
347 size_t localLengthThreshold =
349 return X.getLocalLength () <= localLengthThreshold;
353 template <
class SC,
class LO,
class GO,
class NT>
355 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
359 using namespace Tpetra;
362 using val_type =
typename MV::impl_scalar_type;
363 using mag_type =
typename MV::mag_type;
364 using dual_view_type =
typename MV::dual_view_type;
367 auto comm = map.is_null () ?
nullptr : map->getComm ().getRawPtr ();
368 auto whichVecs = getMultiVectorWhichVectors (X);
372 const bool runOnHost = runKernelOnHost (X);
374 using view_type =
typename dual_view_type::t_host;
375 using array_layout =
typename view_type::array_layout;
376 using device_type =
typename view_type::device_type;
379 normImpl<val_type, array_layout, device_type,
380 mag_type> (norms, X_lcl, whichNorm, whichVecs,
381 isConstantStride, isDistributed, comm);
384 using view_type =
typename dual_view_type::t_dev;
385 using array_layout =
typename view_type::array_layout;
386 using device_type =
typename view_type::device_type;
389 normImpl<val_type, array_layout, device_type,
390 mag_type> (norms, X_lcl, whichNorm, whichVecs,
391 isConstantStride, isDistributed, comm);
400 template <
typename DstView,
typename SrcView>
401 struct AddAssignFunctor {
404 AddAssignFunctor(DstView &tgt_, SrcView &src_) : tgt(tgt_), src(src_) {}
406 KOKKOS_INLINE_FUNCTION
void
407 operator () (
const size_t k)
const { tgt(k) += src(k); }
414 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
418 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
421 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
427 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
430 const size_t numVecs,
431 const bool zeroOut) :
437 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
440 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
443 const Teuchos::DataAccess copyOrView) :
445 view_ (source.view_),
446 whichVectors_ (source.whichVectors_)
449 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
450 "const Teuchos::DataAccess): ";
454 if (copyOrView == Teuchos::Copy) {
458 this->
view_ = cpy.view_;
461 else if (copyOrView == Teuchos::View) {
464 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
465 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
466 "invalid value " << copyOrView <<
". Valid values include "
467 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
468 << Teuchos::View <<
".");
472 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
475 const dual_view_type& view) :
477 view_ (wrapped_dual_view_type(view))
479 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
480 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
481 map->getLocalNumElements ();
482 const size_t lclNumRows_view = view.extent (0);
483 const size_t LDA = getDualViewStride (
view_);
485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
486 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
487 std::invalid_argument,
"Kokkos::DualView does not match Map. "
488 "map->getLocalNumElements() = " << lclNumRows_map
489 <<
", view.extent(0) = " << lclNumRows_view
490 <<
", and getStride() = " << LDA <<
".");
492 using ::Tpetra::Details::Behavior;
493 const bool debug = Behavior::debug ();
495 using ::Tpetra::Details::checkGlobalDualViewValidity;
496 std::ostringstream gblErrStrm;
497 const bool verbose = Behavior::verbose ();
498 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
499 const bool gblValid =
500 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
502 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
503 (! gblValid, std::runtime_error, gblErrStrm.str ());
508 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
511 const wrapped_dual_view_type& view) :
515 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
516 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
517 map->getLocalNumElements ();
518 const size_t lclNumRows_view = view.extent (0);
519 const size_t LDA = getDualViewStride (view);
521 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
522 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
523 std::invalid_argument,
"Kokkos::DualView does not match Map. "
524 "map->getLocalNumElements() = " << lclNumRows_map
525 <<
", view.extent(0) = " << lclNumRows_view
526 <<
", and getStride() = " << LDA <<
".");
528 using ::Tpetra::Details::Behavior;
529 const bool debug = Behavior::debug ();
531 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
532 std::ostringstream gblErrStrm;
533 const bool verbose = Behavior::verbose ();
534 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
535 const bool gblValid =
536 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
538 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
539 (! gblValid, std::runtime_error, gblErrStrm.str ());
545 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
548 const typename dual_view_type::t_dev& d_view) :
551 using Teuchos::ArrayRCP;
553 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
557 const size_t LDA = getViewStride (d_view);
558 const size_t lclNumRows = map->getLocalNumElements ();
559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
560 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
561 "Kokkos::View. map->getLocalNumElements() = " << lclNumRows
562 <<
", View's column stride = " << LDA
563 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
565 auto h_view = Kokkos::create_mirror_view (d_view);
567 view_ = wrapped_dual_view_type(dual_view);
569 using ::Tpetra::Details::Behavior;
570 const bool debug = Behavior::debug ();
572 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
573 std::ostringstream gblErrStrm;
574 const bool verbose = Behavior::verbose ();
575 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
576 const bool gblValid =
577 checkGlobalWrappedDualViewValidity (&gblErrStrm,
view_, verbose,
579 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
580 (! gblValid, std::runtime_error, gblErrStrm.str ());
585 dual_view.modify_device ();
588 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
591 const dual_view_type& view,
592 const dual_view_type& origView) :
594 view_ (wrapped_dual_view_type(view,origView))
596 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
598 const size_t LDA = getDualViewStride (origView);
600 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
601 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
602 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
603 <<
". This may also mean that the input origView's first dimension (number "
604 "of rows = " << origView.extent (0) <<
") does not not match the number "
605 "of entries in the Map on the calling process.");
607 using ::Tpetra::Details::Behavior;
608 const bool debug = Behavior::debug ();
610 using ::Tpetra::Details::checkGlobalDualViewValidity;
611 std::ostringstream gblErrStrm;
612 const bool verbose = Behavior::verbose ();
613 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
614 const bool gblValid_0 =
615 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
617 const bool gblValid_1 =
618 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
620 const bool gblValid = gblValid_0 && gblValid_1;
621 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
622 (! gblValid, std::runtime_error, gblErrStrm.str ());
627 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
630 const dual_view_type& view,
631 const Teuchos::ArrayView<const size_t>& whichVectors) :
634 whichVectors_ (whichVectors.begin (), whichVectors.end ())
637 using Kokkos::subview;
638 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
640 using ::Tpetra::Details::Behavior;
641 const bool debug = Behavior::debug ();
643 using ::Tpetra::Details::checkGlobalDualViewValidity;
644 std::ostringstream gblErrStrm;
645 const bool verbose = Behavior::verbose ();
646 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
647 const bool gblValid =
648 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
651 (! gblValid, std::runtime_error, gblErrStrm.str ());
654 const size_t lclNumRows = map.is_null () ? size_t (0) :
655 map->getLocalNumElements ();
662 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
663 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
664 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
665 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
666 if (whichVectors.size () != 0) {
667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
668 view.extent (1) != 0 && view.extent (1) == 0,
669 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
670 " = " << whichVectors.size () <<
" > 0.");
671 size_t maxColInd = 0;
672 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
673 for (size_type k = 0; k < whichVectors.size (); ++k) {
674 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
675 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
676 std::invalid_argument,
"whichVectors[" << k <<
"] = "
677 "Teuchos::OrdinalTraits<size_t>::invalid().");
678 maxColInd = std::max (maxColInd, whichVectors[k]);
680 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
681 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
682 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
683 <<
" <= max(whichVectors) = " << maxColInd <<
".");
688 const size_t LDA = getDualViewStride (view);
689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
690 (LDA < lclNumRows, std::invalid_argument,
691 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
693 if (whichVectors.size () == 1) {
703 const std::pair<size_t, size_t> colRng (whichVectors[0],
704 whichVectors[0] + 1);
711 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
714 const wrapped_dual_view_type& view,
715 const Teuchos::ArrayView<const size_t>& whichVectors) :
718 whichVectors_ (whichVectors.begin (), whichVectors.end ())
721 using Kokkos::subview;
722 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
724 using ::Tpetra::Details::Behavior;
725 const bool debug = Behavior::debug ();
727 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
728 std::ostringstream gblErrStrm;
729 const bool verbose = Behavior::verbose ();
730 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
731 const bool gblValid =
732 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
734 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
735 (! gblValid, std::runtime_error, gblErrStrm.str ());
738 const size_t lclNumRows = map.is_null () ? size_t (0) :
739 map->getLocalNumElements ();
746 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
747 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
748 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
749 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
750 if (whichVectors.size () != 0) {
751 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
752 view.extent (1) != 0 && view.extent (1) == 0,
753 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
754 " = " << whichVectors.size () <<
" > 0.");
755 size_t maxColInd = 0;
756 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
757 for (size_type k = 0; k < whichVectors.size (); ++k) {
758 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
759 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
760 std::invalid_argument,
"whichVectors[" << k <<
"] = "
761 "Teuchos::OrdinalTraits<size_t>::invalid().");
762 maxColInd = std::max (maxColInd, whichVectors[k]);
764 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
765 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
766 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
767 <<
" <= max(whichVectors) = " << maxColInd <<
".");
772 const size_t LDA = getDualViewStride (view);
773 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
774 (LDA < lclNumRows, std::invalid_argument,
775 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
777 if (whichVectors.size () == 1) {
787 const std::pair<size_t, size_t> colRng (whichVectors[0],
788 whichVectors[0] + 1);
796 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
799 const dual_view_type& view,
800 const dual_view_type& origView,
801 const Teuchos::ArrayView<const size_t>& whichVectors) :
803 view_ (wrapped_dual_view_type(view,origView)),
804 whichVectors_ (whichVectors.begin (), whichVectors.end ())
807 using Kokkos::subview;
808 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
810 using ::Tpetra::Details::Behavior;
811 const bool debug = Behavior::debug ();
813 using ::Tpetra::Details::checkGlobalDualViewValidity;
814 std::ostringstream gblErrStrm;
815 const bool verbose = Behavior::verbose ();
816 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
817 const bool gblValid_0 =
818 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
820 const bool gblValid_1 =
821 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
823 const bool gblValid = gblValid_0 && gblValid_1;
824 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
825 (! gblValid, std::runtime_error, gblErrStrm.str ());
835 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
836 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
837 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
838 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
839 if (whichVectors.size () != 0) {
840 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
841 view.extent (1) != 0 && view.extent (1) == 0,
842 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
843 " = " << whichVectors.size () <<
" > 0.");
844 size_t maxColInd = 0;
845 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
846 for (size_type k = 0; k < whichVectors.size (); ++k) {
847 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
848 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
849 std::invalid_argument,
"whichVectors[" << k <<
"] = "
850 "Teuchos::OrdinalTraits<size_t>::invalid().");
851 maxColInd = std::max (maxColInd, whichVectors[k]);
853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
854 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
855 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
856 <<
" <= max(whichVectors) = " << maxColInd <<
".");
861 const size_t LDA = getDualViewStride (origView);
862 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
863 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
864 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
865 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
866 <<
", origView.stride(1) = " << origView.view_device().stride(1) <<
".");
868 if (whichVectors.size () == 1) {
877 const std::pair<size_t, size_t> colRng (whichVectors[0],
878 whichVectors[0] + 1);
886 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
889 const Teuchos::ArrayView<const Scalar>& data,
891 const size_t numVecs) :
894 typedef LocalOrdinal LO;
895 typedef GlobalOrdinal GO;
896 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
902 const size_t lclNumRows =
903 map.is_null () ? size_t (0) : map->getLocalNumElements ();
904 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
905 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
906 "map->getLocalNumElements() = " << lclNumRows <<
".");
908 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
909 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
910 (static_cast<size_t> (data.size ()) < minNumEntries,
911 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
912 "entries, given the input Map and number of vectors in the MultiVector."
913 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
914 "map->getLocalNumElements () = " << minNumEntries <<
".");
917 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
929 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
930 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
931 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
936 const size_t outStride =
937 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
938 if (LDA == outStride) {
945 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
947 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
949 for (
size_t j = 0; j < numVecs; ++j) {
950 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
951 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
958 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
961 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
962 const size_t numVecs) :
966 typedef LocalOrdinal LO;
967 typedef GlobalOrdinal GO;
968 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
971 const size_t lclNumRows =
972 map.is_null () ? size_t (0) : map->getLocalNumElements ();
973 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
974 (numVecs < 1 || numVecs != static_cast<size_t> (ArrayOfPtrs.size ()),
975 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
976 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
977 for (
size_t j = 0; j < numVecs; ++j) {
978 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
979 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
980 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
981 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
982 << X_j_av.size () <<
" < map->getLocalNumElements() = " << lclNumRows
986 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
991 using array_layout =
typename decltype (X_out)::array_layout;
992 using input_col_view_type =
typename Kokkos::View<
const IST*,
995 Kokkos::MemoryUnmanaged>;
997 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
998 for (
size_t j = 0; j < numVecs; ++j) {
999 Teuchos::ArrayView<const IST> X_j_av =
1000 Teuchos::av_reinterpret_cast<
const IST> (ArrayOfPtrs[j]);
1001 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
1002 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
1008 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1011 return whichVectors_.empty ();
1014 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1019 if (this->getMap ().is_null ()) {
1020 return static_cast<size_t> (0);
1022 return this->getMap ()->getLocalNumElements ();
1026 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1031 if (this->getMap ().is_null ()) {
1032 return static_cast<size_t> (0);
1034 return this->getMap ()->getGlobalNumElements ();
1038 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1043 return isConstantStride () ? getDualViewStride (view_) : size_t (0);
1046 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1052 auto thisData = view_.getDualView().view_host().data();
1053 auto otherData = other.
view_.getDualView().view_host().data();
1054 size_t thisSpan = view_.getDualView().view_host().span();
1055 size_t otherSpan = other.
view_.getDualView().view_host().span();
1056 return (otherData <= thisData && thisData < otherData + otherSpan)
1057 || (thisData <= otherData && otherData < thisData + thisSpan);
1060 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1069 const MV* src =
dynamic_cast<const MV*
> (&sourceObj);
1070 if (src ==
nullptr) {
1084 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1088 return this->getNumVectors ();
1091 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1096 const size_t numSameIDs,
1097 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1098 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1100 const execution_space &space)
1102 using ::Tpetra::Details::Behavior;
1104 using ::Tpetra::Details::ProfilingRegion;
1106 using KokkosRefactor::Details::permute_array_multi_column;
1107 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
1108 using Kokkos::Compat::create_const_view;
1112 MV& sourceMV =
const_cast<MV &
>(
dynamic_cast<const MV&
> (sourceObj));
1115 if (importsAreAliased () && (this->constantNumberOfPackets () != 0) && Behavior::enableGranularTransfers()) {
1120 copyOnHost = ! Behavior::assumeMpiIsGPUAware ();
1123 copyOnHost = runKernelOnHost(sourceMV);
1125 const char longFuncNameHost[] =
"Tpetra::MultiVector::copyAndPermute[Host]";
1126 const char longFuncNameDevice[] =
"Tpetra::MultiVector::copyAndPermute[Device]";
1127 const char tfecfFuncName[] =
"copyAndPermute: ";
1128 ProfilingRegion regionCAP (copyOnHost ? longFuncNameHost : longFuncNameDevice);
1130 const bool verbose = Behavior::verbose ();
1131 std::unique_ptr<std::string> prefix;
1133 auto map = this->getMap ();
1134 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1135 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1136 std::ostringstream os;
1137 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
1138 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1139 os <<
"Start" << endl;
1140 std::cerr << os.str ();
1143 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1144 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
1145 std::logic_error,
"permuteToLIDs.extent(0) = "
1146 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
1147 << permuteFromLIDs.extent (0) <<
".");
1148 const size_t numCols = this->getNumVectors ();
1152 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1153 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1154 std::logic_error,
"Input MultiVector needs sync to both host "
1157 std::ostringstream os;
1158 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1159 std::cerr << os.str ();
1163 std::ostringstream os;
1164 os << *prefix <<
"Copy first " << numSameIDs <<
" elements" << endl;
1165 std::cerr << os.str ();
1190 if (numSameIDs > 0) {
1191 const std::pair<size_t, size_t> rows (0, numSameIDs);
1193 ProfilingRegion regionC (
"Tpetra::MultiVector::copy[Host]");
1194 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1195 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1197 for (
size_t j = 0; j < numCols; ++j) {
1198 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1199 const size_t srcCol =
1200 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1202 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1203 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1207 Kokkos::RangePolicy<execution_space, size_t>;
1208 range_t rp(space, 0,numSameIDs);
1209 Tpetra::Details::AddAssignFunctor<decltype(tgt_j), decltype(src_j)>
1211 Kokkos::parallel_for(rp, aaf);
1222 ProfilingRegion regionC (
"Tpetra::MultiVector::copy[Device]");
1223 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1224 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1226 for (
size_t j = 0; j < numCols; ++j) {
1227 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1228 const size_t srcCol =
1229 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1231 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1232 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1236 Kokkos::RangePolicy<execution_space, size_t>;
1237 range_t rp(space, 0,numSameIDs);
1238 Tpetra::Details::AddAssignFunctor<decltype(tgt_j), decltype(src_j)>
1240 Kokkos::parallel_for(rp, aaf);
1263 if (permuteFromLIDs.extent (0) == 0 ||
1264 permuteToLIDs.extent (0) == 0) {
1266 std::ostringstream os;
1267 os << *prefix <<
"No permutations. Done!" << endl;
1268 std::cerr << os.str ();
1272 ProfilingRegion regionP (
"Tpetra::MultiVector::permute");
1275 std::ostringstream os;
1276 os << *prefix <<
"Permute" << endl;
1277 std::cerr << os.str ();
1282 const bool nonConstStride =
1283 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1286 std::ostringstream os;
1287 os << *prefix <<
"nonConstStride="
1288 << (nonConstStride ?
"true" :
"false") << endl;
1289 std::cerr << os.str ();
1296 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1297 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1298 if (nonConstStride) {
1299 if (this->whichVectors_.size () == 0) {
1300 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1301 tmpTgt.modify_host ();
1302 for (
size_t j = 0; j < numCols; ++j) {
1303 tmpTgt.view_host()(j) = j;
1306 tmpTgt.sync_device ();
1308 tgtWhichVecs = tmpTgt;
1311 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1313 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1317 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1318 (static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1319 this->getNumVectors (),
1320 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1321 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1322 this->getNumVectors () <<
".");
1324 if (sourceMV.whichVectors_.size () == 0) {
1325 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1326 tmpSrc.modify_host ();
1327 for (
size_t j = 0; j < numCols; ++j) {
1328 tmpSrc.view_host()(j) = j;
1331 tmpSrc.sync_device ();
1333 srcWhichVecs = tmpSrc;
1336 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1337 sourceMV.whichVectors_ ();
1339 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1343 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1344 (static_cast<size_t> (srcWhichVecs.extent (0)) !=
1345 sourceMV.getNumVectors (), std::logic_error,
1346 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1347 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1353 std::ostringstream os;
1354 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1355 std::cerr << os.str ();
1357 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1358 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1360 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1361 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1362 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1363 auto permuteFromLIDs_h =
1364 create_const_view (permuteFromLIDs.view_host ());
1367 std::ostringstream os;
1368 os << *prefix <<
"Permute on host" << endl;
1369 std::cerr << os.str ();
1371 if (nonConstStride) {
1374 auto tgtWhichVecs_h =
1375 create_const_view (tgtWhichVecs.view_host ());
1376 auto srcWhichVecs_h =
1377 create_const_view (srcWhichVecs.view_host ());
1379 using op_type = KokkosRefactor::Details::AddOp;
1380 permute_array_multi_column_variable_stride (tgt_h, src_h,
1384 srcWhichVecs_h, numCols,
1388 using op_type = KokkosRefactor::Details::InsertOp;
1389 permute_array_multi_column_variable_stride (tgt_h, src_h,
1393 srcWhichVecs_h, numCols,
1399 using op_type = KokkosRefactor::Details::AddOp;
1400 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1401 permuteFromLIDs_h, numCols, op_type());
1404 using op_type = KokkosRefactor::Details::InsertOp;
1405 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1406 permuteFromLIDs_h, numCols, op_type());
1412 std::ostringstream os;
1413 os << *prefix <<
"Get permute LIDs on device" << endl;
1414 std::cerr << os.str ();
1416 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1417 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1419 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1420 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1421 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1422 auto permuteFromLIDs_d =
1423 create_const_view (permuteFromLIDs.view_device ());
1426 std::ostringstream os;
1427 os << *prefix <<
"Permute on device" << endl;
1428 std::cerr << os.str ();
1430 if (nonConstStride) {
1433 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1434 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1436 using op_type = KokkosRefactor::Details::AddOp;
1437 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1441 srcWhichVecs_d, numCols,
1445 using op_type = KokkosRefactor::Details::InsertOp;
1446 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1450 srcWhichVecs_d, numCols,
1456 using op_type = KokkosRefactor::Details::AddOp;
1457 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1458 permuteFromLIDs_d, numCols, op_type());
1461 using op_type = KokkosRefactor::Details::InsertOp;
1462 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1463 permuteFromLIDs_d, numCols, op_type());
1469 std::ostringstream os;
1470 os << *prefix <<
"Done!" << endl;
1471 std::cerr << os.str ();
1476 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1479 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1481 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1484 copyAndPermute(sourceObj, numSameIDs, permuteToLIDs, permuteFromLIDs, CM,
1489 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1494 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1495 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1496 Kokkos::DualView<size_t*, buffer_device_type> ,
1497 size_t& constantNumPackets,
1498 const execution_space &space)
1500 using ::Tpetra::Details::Behavior;
1501 using ::Tpetra::Details::ProfilingRegion;
1503 using Kokkos::Compat::create_const_view;
1504 using Kokkos::Compat::getKokkosViewDeepCopy;
1509 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
> (sourceObj));
1511 const bool packOnHost = runKernelOnHost(sourceMV);
1512 const char longFuncNameHost[] =
"Tpetra::MultiVector::packAndPrepare[Host]";
1513 const char longFuncNameDevice[] =
"Tpetra::MultiVector::packAndPrepare[Device]";
1514 const char tfecfFuncName[] =
"packAndPrepare: ";
1515 ProfilingRegion regionPAP (packOnHost ? longFuncNameHost : longFuncNameDevice);
1523 const bool debugCheckIndices = Behavior::debug ();
1528 const bool printDebugOutput = Behavior::verbose ();
1530 std::unique_ptr<std::string> prefix;
1531 if (printDebugOutput) {
1532 auto map = this->getMap ();
1533 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1534 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1535 std::ostringstream os;
1536 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1537 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1538 os <<
"Start" << endl;
1539 std::cerr << os.str ();
1543 const size_t numCols = sourceMV.getNumVectors ();
1548 constantNumPackets = numCols;
1552 if (exportLIDs.extent (0) == 0) {
1553 if (printDebugOutput) {
1554 std::ostringstream os;
1555 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1556 std::cerr << os.str ();
1576 const size_t numExportLIDs = exportLIDs.extent (0);
1577 const size_t newExportsSize = numCols * numExportLIDs;
1578 if (printDebugOutput) {
1579 std::ostringstream os;
1580 os << *prefix <<
"realloc: "
1581 <<
"numExportLIDs: " << numExportLIDs
1582 <<
", exports.extent(0): " << exports.extent (0)
1583 <<
", newExportsSize: " << newExportsSize << std::endl;
1584 std::cerr << os.str ();
1590 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1591 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1592 std::logic_error,
"Input MultiVector needs sync to both host "
1594 if (printDebugOutput) {
1595 std::ostringstream os;
1596 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1597 std::cerr << os.str ();
1609 exports.clear_sync_state ();
1610 exports.modify_host ();
1618 exports.clear_sync_state ();
1619 exports.modify_device ();
1635 if (sourceMV.isConstantStride ()) {
1636 using KokkosRefactor::Details::pack_array_single_column;
1637 if (printDebugOutput) {
1638 std::ostringstream os;
1639 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1640 std::cerr << os.str ();
1643 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1644 pack_array_single_column (exports.view_host (),
1646 exportLIDs.view_host (),
1651 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1652 pack_array_single_column (exports.view_device (),
1654 exportLIDs.view_device (),
1661 using KokkosRefactor::Details::pack_array_single_column;
1662 if (printDebugOutput) {
1663 std::ostringstream os;
1664 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1665 std::cerr << os.str ();
1668 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1669 pack_array_single_column (exports.view_host (),
1671 exportLIDs.view_host (),
1672 sourceMV.whichVectors_[0],
1676 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1677 pack_array_single_column (exports.view_device (),
1679 exportLIDs.view_device (),
1680 sourceMV.whichVectors_[0],
1687 if (sourceMV.isConstantStride ()) {
1688 using KokkosRefactor::Details::pack_array_multi_column;
1689 if (printDebugOutput) {
1690 std::ostringstream os;
1691 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1692 std::cerr << os.str ();
1695 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1696 pack_array_multi_column (exports.view_host (),
1698 exportLIDs.view_host (),
1703 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1704 pack_array_multi_column (exports.view_device (),
1706 exportLIDs.view_device (),
1713 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1714 if (printDebugOutput) {
1715 std::ostringstream os;
1716 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1718 std::cerr << os.str ();
1723 using IST = impl_scalar_type;
1724 using DV = Kokkos::DualView<IST*, device_type>;
1725 using HES =
typename DV::t_host::execution_space;
1726 using DES =
typename DV::t_dev::execution_space;
1727 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1729 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1730 pack_array_multi_column_variable_stride
1731 (exports.view_host (),
1733 exportLIDs.view_host (),
1734 getKokkosViewDeepCopy<HES> (whichVecs),
1739 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1740 pack_array_multi_column_variable_stride
1741 (exports.view_device (),
1743 exportLIDs.view_device (),
1744 getKokkosViewDeepCopy<DES> (whichVecs),
1746 debugCheckIndices, space);
1751 if (printDebugOutput) {
1752 std::ostringstream os;
1753 os << *prefix <<
"Done!" << endl;
1754 std::cerr << os.str ();
1760 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1765 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1766 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1767 Kokkos::DualView<size_t*, buffer_device_type> numExportPacketsPerLID,
1768 size_t& constantNumPackets) {
1769 packAndPrepare(sourceObj, exportLIDs, exports, numExportPacketsPerLID, constantNumPackets, execution_space());
1773 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1775 typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1776 typename NO::device_type::memory_space>::value,
1781 const std::string* prefix,
1782 const bool areRemoteLIDsContiguous,
1791 std::ostringstream os;
1792 os << *prefix <<
"Realloc (if needed) imports_ from "
1793 << this->imports_.extent (0) <<
" to " << newSize << std::endl;
1794 std::cerr << os.str ();
1797 bool reallocated =
false;
1799 using IST = impl_scalar_type;
1800 using DV = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
1812 if ((std::is_same_v<typename dual_view_type::t_dev::device_type, typename dual_view_type::t_host::device_type> ||
1813 (Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_device()) ||
1814 (!Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_host())) &&
1815 areRemoteLIDsContiguous &&
1817 (getNumVectors() == 1) &&
1820 size_t offset = getLocalLength () - newSize;
1821 reallocated = this->imports_.view_device().data() != view_.getDualView().view_device().data() + offset;
1823 typedef std::pair<size_t, size_t> range_type;
1824 this->imports_ = DV(view_.getDualView(),
1825 range_type (offset, getLocalLength () ),
1829 std::ostringstream os;
1830 os << *prefix <<
"Aliased imports_ to MV.view_" << std::endl;
1831 std::cerr << os.str ();
1841 std::ostringstream os;
1842 os << *prefix <<
"Finished realloc'ing unaliased_imports_" << std::endl;
1843 std::cerr << os.str ();
1845 this->imports_ = this->unaliased_imports_;
1850 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1852 typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1853 typename NO::device_type::memory_space>::value,
1858 const std::string* prefix,
1859 const bool areRemoteLIDsContiguous,
1865 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1870 const std::string* prefix,
1871 const bool areRemoteLIDsContiguous,
1874 return reallocImportsIfNeededImpl<Node>(newSize, verbose, prefix, areRemoteLIDsContiguous, CM);
1877 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1881 return (this->imports_.view_device().data() + this->imports_.view_device().extent(0) ==
1882 view_.getDualView().view_device().data() + view_.getDualView().view_device().extent(0));
1886 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1890 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1891 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1892 Kokkos::DualView<size_t*, buffer_device_type> ,
1893 const size_t constantNumPackets,
1895 const execution_space &space)
1897 using ::Tpetra::Details::Behavior;
1898 using ::Tpetra::Details::ProfilingRegion;
1899 using KokkosRefactor::Details::unpack_array_multi_column;
1900 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1901 using Kokkos::Compat::getKokkosViewDeepCopy;
1904 const bool unpackOnHost = runKernelOnHost(imports);
1906 const char longFuncNameHost[] =
"Tpetra::MultiVector::unpackAndCombine[Host]";
1907 const char longFuncNameDevice[] =
"Tpetra::MultiVector::unpackAndCombine[Device]";
1908 const char * longFuncName = unpackOnHost ? longFuncNameHost : longFuncNameDevice;
1909 const char tfecfFuncName[] =
"unpackAndCombine: ";
1917 const bool debugCheckIndices = Behavior::debug ();
1919 const bool printDebugOutput = Behavior::verbose ();
1920 std::unique_ptr<std::string> prefix;
1921 if (printDebugOutput) {
1922 auto map = this->getMap ();
1923 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1924 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1925 std::ostringstream os;
1926 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1927 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1928 os <<
"Start" << endl;
1929 std::cerr << os.str ();
1933 if (importLIDs.extent (0) == 0) {
1934 if (printDebugOutput) {
1935 std::ostringstream os;
1936 os << *prefix <<
"No imports. Done!" << endl;
1937 std::cerr << os.str ();
1943 if (importsAreAliased()) {
1944 if (printDebugOutput) {
1945 std::ostringstream os;
1946 os << *prefix <<
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" << endl;
1947 std::cerr << os.str ();
1952 ProfilingRegion regionUAC (longFuncName);
1954 const size_t numVecs = getNumVectors ();
1955 if (debugCheckIndices) {
1956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1957 (static_cast<size_t> (imports.extent (0)) !=
1958 numVecs * importLIDs.extent (0),
1960 "imports.extent(0) = " << imports.extent (0)
1961 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1962 <<
" * " << importLIDs.extent (0) <<
" = "
1963 << numVecs * importLIDs.extent (0) <<
".");
1965 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1966 (constantNumPackets == static_cast<size_t> (0), std::runtime_error,
1967 "constantNumPackets input argument must be nonzero.");
1969 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1970 (static_cast<size_t> (numVecs) !=
1971 static_cast<size_t> (constantNumPackets),
1972 std::runtime_error,
"constantNumPackets must equal numVecs.");
1981 if (this->imports_.need_sync_host()) this->imports_.sync_host();
1984 if (this->imports_.need_sync_device()) this->imports_.sync_device();
1987 if (printDebugOutput) {
1988 std::ostringstream os;
1989 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1991 std::cerr << os.str ();
1996 auto imports_d = imports.view_device ();
1997 auto imports_h = imports.view_host ();
1998 auto importLIDs_d = importLIDs.view_device ();
1999 auto importLIDs_h = importLIDs.view_host ();
2001 Kokkos::DualView<size_t*, device_type> whichVecs;
2002 if (! isConstantStride ()) {
2003 Kokkos::View<
const size_t*, Kokkos::HostSpace,
2004 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
2006 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
2008 whichVecs.modify_host ();
2013 whichVecs.modify_device ();
2018 auto whichVecs_d = whichVecs.view_device ();
2019 auto whichVecs_h = whichVecs.view_host ();
2029 if (numVecs > 0 && importLIDs.extent (0) > 0) {
2030 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
2031 using host_exec_space =
typename dual_view_type::t_host::execution_space;
2034 const bool use_atomic_updates = unpackOnHost ?
2035 host_exec_space().concurrency () != 1 :
2036 dev_exec_space().concurrency () != 1;
2038 if (printDebugOutput) {
2039 std::ostringstream os;
2041 std::cerr << os.str ();
2048 using op_type = KokkosRefactor::Details::InsertOp;
2049 if (isConstantStride ()) {
2051 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2052 unpack_array_multi_column (host_exec_space (),
2053 X_h, imports_h, importLIDs_h,
2054 op_type (), numVecs,
2060 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2061 unpack_array_multi_column (space,
2062 X_d, imports_d, importLIDs_d,
2063 op_type (), numVecs,
2070 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2071 unpack_array_multi_column_variable_stride (host_exec_space (),
2081 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2082 unpack_array_multi_column_variable_stride (space,
2094 using op_type = KokkosRefactor::Details::AddOp;
2095 if (isConstantStride ()) {
2097 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2098 unpack_array_multi_column (host_exec_space (),
2099 X_h, imports_h, importLIDs_h,
2100 op_type (), numVecs,
2105 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2106 unpack_array_multi_column (space,
2107 X_d, imports_d, importLIDs_d,
2108 op_type (), numVecs,
2115 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2116 unpack_array_multi_column_variable_stride (host_exec_space (),
2126 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2127 unpack_array_multi_column_variable_stride (space,
2139 using op_type = KokkosRefactor::Details::AbsMaxOp;
2140 if (isConstantStride ()) {
2142 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2143 unpack_array_multi_column (host_exec_space (),
2144 X_h, imports_h, importLIDs_h,
2145 op_type (), numVecs,
2150 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2151 unpack_array_multi_column (space,
2152 X_d, imports_d, importLIDs_d,
2153 op_type (), numVecs,
2160 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2161 unpack_array_multi_column_variable_stride (host_exec_space (),
2171 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2172 unpack_array_multi_column_variable_stride (space,
2184 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2185 (
true, std::logic_error,
"Invalid CombineMode");
2189 if (printDebugOutput) {
2190 std::ostringstream os;
2191 os << *prefix <<
"Nothing to unpack" << endl;
2192 std::cerr << os.str ();
2196 if (printDebugOutput) {
2197 std::ostringstream os;
2198 os << *prefix <<
"Done!" << endl;
2199 std::cerr << os.str ();
2204 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2208 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
2209 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
2210 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
2211 const size_t constantNumPackets,
2213 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets, CM, execution_space());
2217 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2222 if (isConstantStride ()) {
2223 return static_cast<size_t> (view_.extent (1));
2225 return static_cast<size_t> (whichVectors_.size ());
2233 gblDotImpl (
const RV& dotsOut,
2234 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2235 const bool distributed)
2237 using Teuchos::REDUCE_MAX;
2238 using Teuchos::REDUCE_SUM;
2239 using Teuchos::reduceAll;
2240 typedef typename RV::non_const_value_type dot_type;
2242 const size_t numVecs = dotsOut.extent (0);
2257 if (distributed && ! comm.is_null ()) {
2260 const int nv =
static_cast<int> (numVecs);
2261 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
2263 if (commIsInterComm) {
2267 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
2270 const dot_type*
const lclSum = lclDots.data ();
2271 dot_type*
const gblSum = dotsOut.data ();
2272 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2275 dot_type*
const inout = dotsOut.data ();
2276 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
2282 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2286 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
2288 using ::Tpetra::Details::Behavior;
2289 using Kokkos::subview;
2290 using Teuchos::Comm;
2291 using Teuchos::null;
2294 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
2295 typedef typename dual_view_type::t_dev_const XMV;
2296 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
2300 const size_t numVecs = this->getNumVectors ();
2304 const size_t lclNumRows = this->getLocalLength ();
2305 const size_t numDots =
static_cast<size_t> (dots.extent (0));
2306 const bool debug = Behavior::debug ();
2309 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
2310 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2311 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
2312 "compatible with the input MultiVector A. We only test for this "
2323 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2325 "MultiVectors do not have the same local length. "
2326 "this->getLocalLength() = " << lclNumRows <<
" != "
2328 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2330 "MultiVectors must have the same number of columns (vectors). "
2331 "this->getNumVectors() = " << numVecs <<
" != "
2333 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2334 numDots != numVecs, std::runtime_error,
2335 "The output array 'dots' must have the same number of entries as the "
2336 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2337 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2339 const std::pair<size_t, size_t> colRng (0, numVecs);
2340 RV dotsOut = subview (dots, colRng);
2341 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
2342 this->getMap ()->getComm ();
2344 auto thisView = this->getLocalViewDevice(Access::ReadOnly);
2347 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
2348 this->whichVectors_.getRawPtr (),
2359 exec_space_instance.fence();
2361 gblDotImpl (dotsOut, comm, this->isDistributed ());
2365 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2370 using ::Tpetra::Details::ProfilingRegion;
2372 using dot_type =
typename MV::dot_type;
2373 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
2376 Teuchos::RCP<const Teuchos::Comm<int> > comm =
2377 map.is_null () ? Teuchos::null : map->getComm ();
2378 if (comm.is_null ()) {
2379 return Kokkos::ArithTraits<dot_type>::zero ();
2382 using LO = LocalOrdinal;
2386 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
2388 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
2389 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
2390 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
2397 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
2399 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
2400 lclDot = KokkosBlas::dot (x_1d, y_1d);
2403 using Teuchos::outArg;
2404 using Teuchos::REDUCE_SUM;
2405 using Teuchos::reduceAll;
2406 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2418 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2422 const Teuchos::ArrayView<dot_type>& dots)
const
2424 const char tfecfFuncName[] =
"dot: ";
2427 const size_t numVecs = this->getNumVectors ();
2428 const size_t lclNumRows = this->getLocalLength ();
2429 const size_t numDots =
static_cast<size_t> (dots.size ());
2439 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2441 "MultiVectors do not have the same local length. "
2442 "this->getLocalLength() = " << lclNumRows <<
" != "
2444 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2446 "MultiVectors must have the same number of columns (vectors). "
2447 "this->getNumVectors() = " << numVecs <<
" != "
2449 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2450 (numDots != numVecs, std::runtime_error,
2451 "The output array 'dots' must have the same number of entries as the "
2452 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2453 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2456 const dot_type gblDot = multiVectorSingleColumnDot (*
this, A);
2460 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2464 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2467 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2470 using ::Tpetra::Details::NORM_TWO;
2471 using ::Tpetra::Details::ProfilingRegion;
2472 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2475 MV& X =
const_cast<MV&
> (*this);
2476 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2479 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2482 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2484 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2485 this->norm2 (norms_av);
2489 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2492 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2495 using ::Tpetra::Details::NORM_ONE;
2496 using ::Tpetra::Details::ProfilingRegion;
2497 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2500 MV& X =
const_cast<MV&
> (*this);
2501 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2504 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2507 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2509 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2510 this->norm1 (norms_av);
2513 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2516 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2519 using ::Tpetra::Details::NORM_INF;
2520 using ::Tpetra::Details::ProfilingRegion;
2521 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2524 MV& X =
const_cast<MV&
> (*this);
2525 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2528 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2531 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2533 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2534 this->normInf (norms_av);
2537 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2540 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2544 using Kokkos::subview;
2545 using Teuchos::Comm;
2547 using Teuchos::reduceAll;
2548 using Teuchos::REDUCE_SUM;
2549 typedef Kokkos::ArithTraits<impl_scalar_type> ATS;
2551 const size_t lclNumRows = this->getLocalLength ();
2552 const size_t numVecs = this->getNumVectors ();
2553 const size_t numMeans =
static_cast<size_t> (means.size ());
2555 TEUCHOS_TEST_FOR_EXCEPTION(
2556 numMeans != numVecs, std::runtime_error,
2557 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2558 <<
" != this->getNumVectors() = " << numVecs <<
".");
2560 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2561 const std::pair<size_t, size_t> colRng (0, numVecs);
2566 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2568 typename local_view_type::HostMirror::array_layout,
2570 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2571 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2573 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2574 this->getMap ()->getComm ();
2577 const bool useHostVersion = this->need_sync_device ();
2578 if (useHostVersion) {
2580 auto X_lcl = subview (getLocalViewHost(Access::ReadOnly),
2581 rowRng, Kokkos::ALL ());
2583 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2584 if (isConstantStride ()) {
2585 KokkosBlas::sum (lclSums, X_lcl);
2588 for (
size_t j = 0; j < numVecs; ++j) {
2589 const size_t col = whichVectors_[j];
2590 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2597 if (! comm.is_null () && this->isDistributed ()) {
2598 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2599 lclSums.data (), meansOut.data ());
2608 auto X_lcl = subview (this->getLocalViewDevice(Access::ReadOnly),
2609 rowRng, Kokkos::ALL ());
2612 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2613 if (isConstantStride ()) {
2614 KokkosBlas::sum (lclSums, X_lcl);
2617 for (
size_t j = 0; j < numVecs; ++j) {
2618 const size_t col = whichVectors_[j];
2619 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2629 exec_space_instance.fence();
2635 if (! comm.is_null () && this->isDistributed ()) {
2636 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2637 lclSums.data (), meansOut.data ());
2648 const impl_scalar_type OneOverN =
2649 ATS::one () /
static_cast<mag_type> (this->getGlobalLength ());
2650 for (
size_t k = 0; k < numMeans; ++k) {
2651 meansOut(k) = meansOut(k) * OneOverN;
2656 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2662 typedef Kokkos::ArithTraits<IST> ATS;
2663 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2664 typedef typename pool_type::generator_type generator_type;
2666 const IST max = Kokkos::rand<generator_type, IST>::max ();
2667 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2669 this->randomize (static_cast<Scalar> (min), static_cast<Scalar> (max));
2673 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2679 typedef Tpetra::Details::Static_Random_XorShift64_Pool<typename device_type::execution_space> tpetra_pool_type;
2680 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2683 if(!tpetra_pool_type::isSet())
2684 tpetra_pool_type::resetPool(this->getMap()->getComm()->getRank());
2686 pool_type & rand_pool = tpetra_pool_type::getPool();
2687 const IST max =
static_cast<IST
> (maxVal);
2688 const IST min =
static_cast<IST
> (minVal);
2690 auto thisView = this->getLocalViewDevice(Access::OverwriteAll);
2692 if (isConstantStride ()) {
2693 Kokkos::fill_random (thisView, rand_pool, min, max);
2696 const size_t numVecs = getNumVectors ();
2697 for (
size_t k = 0; k < numVecs; ++k) {
2698 const size_t col = whichVectors_[k];
2699 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2700 Kokkos::fill_random (X_k, rand_pool, min, max);
2705 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2710 using ::Tpetra::Details::ProfilingRegion;
2711 using ::Tpetra::Details::Blas::fill;
2712 using DES =
typename dual_view_type::t_dev::execution_space;
2713 using HES =
typename dual_view_type::t_host::execution_space;
2714 using LO = LocalOrdinal;
2715 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2720 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
2721 const LO numVecs =
static_cast<LO
> (this->getNumVectors ());
2727 const bool runOnHost = runKernelOnHost(*
this);
2729 auto X = this->getLocalViewDevice(Access::OverwriteAll);
2730 if (this->isConstantStride ()) {
2731 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2734 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2735 this->whichVectors_.getRawPtr ());
2739 auto X = this->getLocalViewHost(Access::OverwriteAll);
2740 if (this->isConstantStride ()) {
2741 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2744 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2745 this->whichVectors_.getRawPtr ());
2751 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2756 using Teuchos::ArrayRCP;
2757 using Teuchos::Comm;
2760 using LO = LocalOrdinal;
2761 using GO = GlobalOrdinal;
2767 TEUCHOS_TEST_FOR_EXCEPTION(
2768 ! this->isConstantStride (), std::logic_error,
2769 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2770 "if the MultiVector is a column view of another MultiVector (that is, if "
2771 "isConstantStride() == false).");
2801 if (this->getMap ().is_null ()) {
2806 TEUCHOS_TEST_FOR_EXCEPTION(
2807 newMap.is_null (), std::invalid_argument,
2808 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2809 "This probably means that the input Map is incorrect.");
2813 const size_t newNumRows = newMap->getLocalNumElements ();
2814 const size_t origNumRows = view_.extent (0);
2815 const size_t numCols = this->getNumVectors ();
2817 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2818 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2821 else if (newMap.is_null ()) {
2824 const size_t newNumRows =
static_cast<size_t> (0);
2825 const size_t numCols = this->getNumVectors ();
2826 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2829 this->map_ = newMap;
2832 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2840 const IST theAlpha =
static_cast<IST
> (alpha);
2841 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2844 const size_t lclNumRows = getLocalLength ();
2845 const size_t numVecs = getNumVectors ();
2846 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2847 const std::pair<size_t, size_t> colRng (0, numVecs);
2855 const bool useHostVersion = need_sync_device ();
2856 if (useHostVersion) {
2857 auto Y_lcl = Kokkos::subview (getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2858 if (isConstantStride ()) {
2859 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2862 for (
size_t k = 0; k < numVecs; ++k) {
2863 const size_t Y_col = whichVectors_[k];
2864 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2865 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2870 auto Y_lcl = Kokkos::subview (getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2871 if (isConstantStride ()) {
2872 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2875 for (
size_t k = 0; k < numVecs; ++k) {
2876 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2877 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2878 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2885 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2888 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2890 const size_t numVecs = this->getNumVectors ();
2891 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2892 TEUCHOS_TEST_FOR_EXCEPTION(
2893 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2894 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2898 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2899 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2900 k_alphas.modify_host ();
2901 for (
size_t i = 0; i < numAlphas; ++i) {
2902 k_alphas.view_host()(i) = static_cast<impl_scalar_type> (alphas[i]);
2904 k_alphas.sync_device ();
2906 this->scale (k_alphas.view_device ());
2909 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2912 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2915 using Kokkos::subview;
2917 const size_t lclNumRows = this->getLocalLength ();
2918 const size_t numVecs = this->getNumVectors ();
2919 TEUCHOS_TEST_FOR_EXCEPTION(
2920 static_cast<size_t> (alphas.extent (0)) != numVecs,
2921 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2922 "alphas.extent(0) = " << alphas.extent (0)
2923 <<
" != this->getNumVectors () = " << numVecs <<
".");
2924 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2925 const std::pair<size_t, size_t> colRng (0, numVecs);
2935 const bool useHostVersion = this->need_sync_device ();
2936 if (useHostVersion) {
2939 auto alphas_h = Kokkos::create_mirror_view (alphas);
2943 auto Y_lcl = subview (this->getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2944 if (isConstantStride ()) {
2945 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2948 for (
size_t k = 0; k < numVecs; ++k) {
2949 const size_t Y_col = this->isConstantStride () ? k :
2950 this->whichVectors_[k];
2951 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2954 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2959 auto Y_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2960 if (isConstantStride ()) {
2961 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2968 auto alphas_h = Kokkos::create_mirror_view (alphas);
2972 for (
size_t k = 0; k < numVecs; ++k) {
2973 const size_t Y_col = this->isConstantStride () ? k :
2974 this->whichVectors_[k];
2975 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2976 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2982 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2989 using Kokkos::subview;
2990 const char tfecfFuncName[] =
"scale: ";
2992 const size_t lclNumRows = getLocalLength ();
2993 const size_t numVecs = getNumVectors ();
2995 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2997 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2999 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3001 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3005 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3006 const std::pair<size_t, size_t> colRng (0, numVecs);
3008 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
3010 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
3011 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
3014 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
3018 for (
size_t k = 0; k < numVecs; ++k) {
3019 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3021 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3022 auto X_k = subview (X_lcl, ALL (), X_col);
3024 KokkosBlas::scal (Y_k, theAlpha, X_k);
3031 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3036 const char tfecfFuncName[] =
"reciprocal: ";
3038 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3040 "MultiVectors do not have the same local length. "
3041 "this->getLocalLength() = " << getLocalLength ()
3043 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3044 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3045 ": MultiVectors do not have the same number of columns (vectors). "
3046 "this->getNumVectors() = " << getNumVectors ()
3047 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3049 const size_t numVecs = getNumVectors ();
3051 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
3055 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
3059 using Kokkos::subview;
3060 for (
size_t k = 0; k < numVecs; ++k) {
3061 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3062 auto vector_k = subview (this_view_dev, ALL (), this_col);
3063 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3064 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3065 KokkosBlas::reciprocal (vector_k, vector_Ak);
3070 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3075 const char tfecfFuncName[] =
"abs";
3077 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3079 ": MultiVectors do not have the same local length. "
3080 "this->getLocalLength() = " << getLocalLength ()
3082 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3083 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3084 ": MultiVectors do not have the same number of columns (vectors). "
3085 "this->getNumVectors() = " << getNumVectors ()
3086 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3087 const size_t numVecs = getNumVectors ();
3089 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
3093 KokkosBlas::abs (this_view_dev, A_view_dev);
3097 using Kokkos::subview;
3099 for (
size_t k=0; k < numVecs; ++k) {
3100 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3101 auto vector_k = subview (this_view_dev, ALL (), this_col);
3102 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3103 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3104 KokkosBlas::abs (vector_k, vector_Ak);
3109 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3116 const char tfecfFuncName[] =
"update: ";
3117 using Kokkos::subview;
3122 const size_t lclNumRows = getLocalLength ();
3123 const size_t numVecs = getNumVectors ();
3126 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3128 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
3130 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3132 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3138 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3139 const std::pair<size_t, size_t> colRng (0, numVecs);
3141 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
3142 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
3144 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
3148 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
3152 for (
size_t k = 0; k < numVecs; ++k) {
3153 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3155 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3156 auto X_k = subview (X_lcl, ALL (), X_col);
3158 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
3163 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3170 const Scalar& gamma)
3173 using Kokkos::subview;
3175 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3179 const size_t lclNumRows = this->getLocalLength ();
3180 const size_t numVecs = getNumVectors ();
3183 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3185 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
3186 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3188 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3190 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
3191 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3193 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3195 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
3196 "but this MultiVector has " << numVecs <<
" column(s).");
3197 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3199 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
3200 "but this MultiVector has " << numVecs <<
" column(s).");
3207 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3208 const std::pair<size_t, size_t> colRng (0, numVecs);
3213 auto C_lcl = subview (this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
3218 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3223 for (
size_t k = 0; k < numVecs; ++k) {
3224 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3227 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3228 theBeta, subview (B_lcl, rowRng, B_col),
3229 theGamma, subview (C_lcl, rowRng, this_col));
3234 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3235 Teuchos::ArrayRCP<const Scalar>
3241 const char tfecfFuncName[] =
"getData: ";
3248 auto hostView = getLocalViewHost(Access::ReadOnly);
3249 const size_t col = isConstantStride () ? j : whichVectors_[j];
3250 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
3251 Teuchos::ArrayRCP<const IST> dataAsArcp =
3252 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3254 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3255 (static_cast<size_t> (hostView_j.extent (0)) <
3256 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3257 "hostView_j.extent(0) = " << hostView_j.extent (0)
3258 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3259 "Please report this bug to the Tpetra developers.");
3261 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3264 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3265 Teuchos::ArrayRCP<Scalar>
3270 using Kokkos::subview;
3272 const char tfecfFuncName[] =
"getDataNonConst: ";
3274 auto hostView = getLocalViewHost(Access::ReadWrite);
3275 const size_t col = isConstantStride () ? j : whichVectors_[j];
3276 auto hostView_j = subview (hostView, ALL (), col);
3277 Teuchos::ArrayRCP<IST> dataAsArcp =
3278 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3280 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3281 (static_cast<size_t> (hostView_j.extent (0)) <
3282 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3283 "hostView_j.extent(0) = " << hostView_j.extent (0)
3284 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3285 "Please report this bug to the Tpetra developers.");
3287 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3290 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3291 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3293 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3300 bool contiguous =
true;
3301 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3302 for (
size_t j = 1; j < numCopyVecs; ++j) {
3303 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3308 if (contiguous && numCopyVecs > 0) {
3309 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3312 RCP<const MV> X_sub = this->subView (cols);
3313 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3319 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3320 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3327 RCP<const MV> X_sub = this->subView (colRng);
3328 RCP<MV> Y (
new MV (this->getMap (), static_cast<size_t> (colRng.size ()),
false));
3333 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3337 return view_.origExtent(0);
3340 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3344 return view_.origExtent(1);
3347 template <
class Scalar,
class LO,
class GO,
class Node>
3350 const Teuchos::RCP<const map_type>& subMap,
3351 const local_ordinal_type rowOffset) :
3355 using Kokkos::subview;
3356 using Teuchos::outArg;
3359 using Teuchos::reduceAll;
3360 using Teuchos::REDUCE_MIN;
3363 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3364 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3367 std::unique_ptr<std::ostringstream> errStrm;
3374 const auto comm = subMap->getComm ();
3375 TEUCHOS_ASSERT( ! comm.is_null () );
3376 const int myRank = comm->getRank ();
3378 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3380 const LO newNumRows =
static_cast<LO
> (subMap->getLocalNumElements ());
3382 std::ostringstream os;
3383 os <<
"Proc " << myRank <<
": " << prefix
3384 <<
"X: {lclNumRows: " << lclNumRowsBefore
3386 <<
", numCols: " << numCols <<
"}, "
3387 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3388 std::cerr << os.str ();
3393 const bool tooManyElts =
3396 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3397 *errStrm <<
" Proc " << myRank <<
": subMap->getLocalNumElements() (="
3398 << newNumRows <<
") + rowOffset (=" << rowOffset
3402 TEUCHOS_TEST_FOR_EXCEPTION
3403 (! debug && tooManyElts, std::invalid_argument,
3404 prefix << errStrm->str () << suffix);
3408 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3410 std::ostringstream gblErrStrm;
3411 const std::string myErrStr =
3412 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3413 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3414 TEUCHOS_TEST_FOR_EXCEPTION
3415 (
true, std::invalid_argument, gblErrStrm.str ());
3419 using range_type = std::pair<LO, LO>;
3420 const range_type origRowRng
3421 (rowOffset, static_cast<LO> (X.
view_.origExtent (0)));
3422 const range_type rowRng
3423 (rowOffset, rowOffset + newNumRows);
3425 wrapped_dual_view_type newView = takeSubview (X.
view_, rowRng, ALL ());
3432 if (newView.extent (0) == 0 &&
3433 newView.extent (1) != X.
view_.extent (1)) {
3435 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3439 MV (subMap, newView) :
3443 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3444 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3445 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3447 if (errStrm.get () ==
nullptr) {
3448 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3450 *errStrm <<
" Proc " << myRank <<
3451 ": subMap.getLocalNumElements(): " << newNumRows <<
3452 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3453 ", X.getNumVectors(): " << numCols <<
3454 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3456 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3458 std::ostringstream gblErrStrm;
3460 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3461 "dimensions on one or more processes:" << endl;
3463 const std::string myErrStr =
3464 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3465 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3466 gblErrStrm << suffix << endl;
3467 TEUCHOS_TEST_FOR_EXCEPTION
3468 (
true, std::invalid_argument, gblErrStrm.str ());
3473 std::ostringstream os;
3474 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3475 std::cerr << os.str ();
3481 std::ostringstream os;
3482 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3483 std::cerr << os.str ();
3487 template <
class Scalar,
class LO,
class GO,
class Node>
3490 const map_type& subMap,
3491 const size_t rowOffset) :
3492 MultiVector (X, Teuchos::RCP<const map_type> (new map_type (subMap)),
3496 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3497 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3500 const size_t offset)
const
3503 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3506 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3507 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3510 const size_t offset)
3513 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3516 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3517 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3519 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3521 using Teuchos::Array;
3525 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3526 TEUCHOS_TEST_FOR_EXCEPTION(
3527 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3528 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3529 "contain at least one entry, but cols.size() = " << cols.size ()
3534 bool contiguous =
true;
3535 for (
size_t j = 1; j < numViewCols; ++j) {
3536 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3542 if (numViewCols == 0) {
3544 return rcp (
new MV (this->getMap (), numViewCols));
3547 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3551 if (isConstantStride ()) {
3552 return rcp (
new MV (this->getMap (), view_, cols));
3555 Array<size_t> newcols (cols.size ());
3556 for (
size_t j = 0; j < numViewCols; ++j) {
3557 newcols[j] = whichVectors_[cols[j]];
3559 return rcp (
new MV (this->getMap (), view_, newcols ()));
3564 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3565 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3569 using ::Tpetra::Details::Behavior;
3571 using Kokkos::subview;
3572 using Teuchos::Array;
3576 const char tfecfFuncName[] =
"subView(Range1D): ";
3578 const size_t lclNumRows = this->getLocalLength ();
3579 const size_t numVecs = this->getNumVectors ();
3583 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3584 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3585 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3587 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3588 numVecs != 0 && colRng.size () != 0 &&
3589 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3590 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3591 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3592 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3593 "[0, " << numVecs <<
"].");
3595 RCP<const MV> X_ret;
3605 const std::pair<size_t, size_t> rows (0, lclNumRows);
3606 if (colRng.size () == 0) {
3607 const std::pair<size_t, size_t> cols (0, 0);
3608 wrapped_dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3609 X_ret = rcp (
new MV (this->getMap (), X_sub));
3613 if (isConstantStride ()) {
3614 const std::pair<size_t, size_t> cols (colRng.lbound (),
3615 colRng.ubound () + 1);
3616 wrapped_dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3617 X_ret = rcp (
new MV (this->getMap (), X_sub));
3620 if (static_cast<size_t> (colRng.size ()) == static_cast<size_t> (1)) {
3623 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3624 whichVectors_[0] + colRng.ubound () + 1);
3625 wrapped_dual_view_type X_sub = takeSubview (view_, ALL (), col);
3626 X_ret = rcp (
new MV (this->getMap (), X_sub));
3629 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3630 whichVectors_.begin () + colRng.ubound () + 1);
3631 X_ret = rcp (
new MV (this->getMap (), view_, which));
3636 const bool debug = Behavior::debug ();
3638 using Teuchos::Comm;
3639 using Teuchos::outArg;
3640 using Teuchos::REDUCE_MIN;
3641 using Teuchos::reduceAll;
3643 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3644 Teuchos::null : this->getMap ()->getComm ();
3645 if (! comm.is_null ()) {
3649 if (X_ret.is_null ()) {
3652 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3653 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3654 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3655 "MultiVector; the return value of this method) is null on some MPI "
3656 "process in this MultiVector's communicator. This should never "
3657 "happen. Please report this bug to the Tpetra developers.");
3658 if (! X_ret.is_null () &&
3659 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3662 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3663 outArg (gblSuccess));
3664 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3665 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3666 "colRng.size(), on at least one MPI process in this MultiVector's "
3667 "communicator. This should never happen. "
3668 "Please report this bug to the Tpetra developers.");
3675 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3676 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3681 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3685 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3686 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3691 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3695 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3701 using Kokkos::subview;
3702 typedef std::pair<size_t, size_t> range_type;
3703 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3706 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3707 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3708 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3710 static_cast<size_t> (j) :
3712 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3723 const size_t newSize = X.
imports_.extent (0) / numCols;
3724 const size_t offset = jj*newSize;
3725 auto device_view = subview (X.
imports_.view_device(),
3726 range_type (offset, offset+newSize));
3727 auto host_view = subview (X.
imports_.view_host(),
3728 range_type (offset, offset+newSize));
3732 const size_t newSize = X.
exports_.extent (0) / numCols;
3733 const size_t offset = jj*newSize;
3734 auto device_view = subview (X.
exports_.view_device(),
3735 range_type (offset, offset+newSize));
3736 auto host_view = subview (X.
exports_.view_host(),
3737 range_type (offset, offset+newSize));
3748 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3749 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3754 return Teuchos::rcp (
new V (*
this, j));
3758 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3759 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3764 return Teuchos::rcp (
new V (*
this, j));
3768 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3771 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3774 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3776 Kokkos::MemoryUnmanaged>;
3777 const char tfecfFuncName[] =
"get1dCopy: ";
3779 const size_t numRows = this->getLocalLength ();
3780 const size_t numCols = this->getNumVectors ();
3782 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3783 (LDA < numRows, std::runtime_error,
3784 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3786 (numRows >
size_t (0) && numCols >
size_t (0) &&
3787 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3789 "A.size() = " << A.size () <<
", but its size must be at least "
3790 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3792 const std::pair<size_t, size_t> rowRange (0, numRows);
3793 const std::pair<size_t, size_t> colRange (0, numCols);
3795 input_view_type A_view_orig (reinterpret_cast<IST*> (A.getRawPtr ()),
3797 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3810 if (this->need_sync_host() && this->need_sync_device()) {
3813 throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
3816 const bool useHostView = view_.host_view_use_count() >= view_.device_view_use_count();
3817 if (this->isConstantStride ()) {
3819 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3823 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3829 for (
size_t j = 0; j < numCols; ++j) {
3830 const size_t srcCol = this->whichVectors_[j];
3831 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3834 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3835 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3839 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3840 auto srcColView_device = Kokkos::subview (srcView_device, rowRange, srcCol);
3850 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3853 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3856 const char tfecfFuncName[] =
"get2dCopy: ";
3857 const size_t numRows = this->getLocalLength ();
3858 const size_t numCols = this->getNumVectors ();
3860 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3861 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3862 std::runtime_error,
"Input array of pointers must contain as many "
3863 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3864 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3866 if (numRows != 0 && numCols != 0) {
3868 for (
size_t j = 0; j < numCols; ++j) {
3869 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3870 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3871 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3872 "the input array of arrays is not long enough to fit all entries in "
3873 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3874 <<
" < getLocalLength() = " << numRows <<
".");
3878 for (
size_t j = 0; j < numCols; ++j) {
3879 Teuchos::RCP<const V> X_j = this->getVector (j);
3880 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3881 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3887 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3888 Teuchos::ArrayRCP<const Scalar>
3892 if (getLocalLength () == 0 || getNumVectors () == 0) {
3893 return Teuchos::null;
3895 TEUCHOS_TEST_FOR_EXCEPTION(
3896 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3897 "get1dView: This MultiVector does not have constant stride, so it is "
3898 "not possible to view its data as a single array. You may check "
3899 "whether a MultiVector has constant stride by calling "
3900 "isConstantStride().");
3904 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3905 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3906 Kokkos::Compat::persistingView (X_lcl);
3907 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3911 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3912 Teuchos::ArrayRCP<Scalar>
3916 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3917 return Teuchos::null;
3920 TEUCHOS_TEST_FOR_EXCEPTION
3921 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3922 "get1dViewNonConst: This MultiVector does not have constant stride, "
3923 "so it is not possible to view its data as a single array. You may "
3924 "check whether a MultiVector has constant stride by calling "
3925 "isConstantStride().");
3926 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3927 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3928 Kokkos::Compat::persistingView (X_lcl);
3929 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3933 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3934 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3938 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3944 const size_t myNumRows = this->getLocalLength ();
3945 const size_t numCols = this->getNumVectors ();
3946 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3948 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3949 for (
size_t j = 0; j < numCols; ++j) {
3950 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3951 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3952 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3953 Kokkos::Compat::persistingView (X_lcl_j);
3954 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3959 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3964 return view_.getHostView(s);
3967 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3972 return view_.getHostView(s);
3975 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3980 return view_.getHostView(s);
3983 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3988 return view_.getDeviceView(s);
3991 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3996 return view_.getDeviceView(s);
3999 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4004 return view_.getDeviceView(s);
4007 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4014 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4015 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
4022 auto X_lcl = getLocalViewHost(Access::ReadOnly);
4028 const size_t myNumRows = this->getLocalLength ();
4029 const size_t numCols = this->getNumVectors ();
4030 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
4032 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
4033 for (
size_t j = 0; j < numCols; ++j) {
4034 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
4035 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
4036 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
4037 Kokkos::Compat::persistingView (X_lcl_j);
4038 views[j] = Teuchos::arcp_reinterpret_cast<
const Scalar> (X_lcl_j_arcp);
4043 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4047 Teuchos::ETransp transB,
4048 const Scalar& alpha,
4053 using ::Tpetra::Details::ProfilingRegion;
4054 using Teuchos::CONJ_TRANS;
4055 using Teuchos::NO_TRANS;
4056 using Teuchos::TRANS;
4059 using Teuchos::rcpFromRef;
4061 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
4063 using STS = Teuchos::ScalarTraits<Scalar>;
4065 const char tfecfFuncName[] =
"multiply: ";
4066 ProfilingRegion region (
"Tpetra::MV::multiply");
4098 const bool C_is_local = ! this->isDistributed ();
4108 auto myMap = this->getMap ();
4109 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
4110 using Teuchos::REDUCE_MIN;
4111 using Teuchos::reduceAll;
4112 using Teuchos::outArg;
4114 auto comm = myMap->getComm ();
4115 const size_t A_nrows =
4117 const size_t A_ncols =
4119 const size_t B_nrows =
4121 const size_t B_ncols =
4124 const bool lclBad = this->getLocalLength () != A_nrows ||
4125 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
4127 const int myRank = comm->getRank ();
4128 std::ostringstream errStrm;
4129 if (this->getLocalLength () != A_nrows) {
4130 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
4131 << this->getLocalLength () <<
" != A_nrows=" << A_nrows
4132 <<
"." << std::endl;
4134 if (this->getNumVectors () != B_ncols) {
4135 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
4136 << this->getNumVectors () <<
" != B_ncols=" << B_ncols
4137 <<
"." << std::endl;
4139 if (A_ncols != B_nrows) {
4140 errStrm <<
"Proc " << myRank <<
": A_ncols="
4141 << A_ncols <<
" != B_nrows=" << B_nrows
4142 <<
"." << std::endl;
4145 const int lclGood = lclBad ? 0 : 1;
4147 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
4149 std::ostringstream os;
4150 ::Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
4152 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4153 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
4154 "least one process in this object's communicator." << std::endl
4156 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
4158 << (transA == Teuchos::TRANS ?
"^T" :
4159 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4160 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
4162 << (transA == Teuchos::TRANS ?
"^T" :
4163 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4164 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
4165 <<
"Global dimensions: C(" << this->getGlobalLength () <<
", "
4175 const bool Case1 = C_is_local && A_is_local && B_is_local;
4177 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
4178 transA != NO_TRANS &&
4181 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
4185 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4186 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
4187 "Multiplication of op(A) and op(B) into *this is not a "
4188 "supported use case.");
4190 if (beta != STS::zero () && Case2) {
4196 const int myRank = this->getMap ()->getComm ()->getRank ();
4198 beta_local = ATS::zero ();
4207 if (! isConstantStride ()) {
4208 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
4210 C_tmp = rcp (
this,
false);
4213 RCP<const MV> A_tmp;
4215 A_tmp = rcp (
new MV (A, Teuchos::Copy));
4217 A_tmp = rcpFromRef (A);
4220 RCP<const MV> B_tmp;
4222 B_tmp = rcp (
new MV (B, Teuchos::Copy));
4224 B_tmp = rcpFromRef (B);
4227 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4228 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
4229 ! A_tmp->isConstantStride (), std::logic_error,
4230 "Failed to make temporary constant-stride copies of MultiVectors.");
4233 const LO A_lclNumRows = A_tmp->getLocalLength ();
4234 const LO A_numVecs = A_tmp->getNumVectors ();
4235 auto A_lcl = A_tmp->getLocalViewDevice(Access::ReadOnly);
4236 auto A_sub = Kokkos::subview (A_lcl,
4237 std::make_pair (LO (0), A_lclNumRows),
4238 std::make_pair (LO (0), A_numVecs));
4241 const LO B_lclNumRows = B_tmp->getLocalLength ();
4242 const LO B_numVecs = B_tmp->getNumVectors ();
4243 auto B_lcl = B_tmp->getLocalViewDevice(Access::ReadOnly);
4244 auto B_sub = Kokkos::subview (B_lcl,
4245 std::make_pair (LO (0), B_lclNumRows),
4246 std::make_pair (LO (0), B_numVecs));
4248 const LO C_lclNumRows = C_tmp->getLocalLength ();
4249 const LO C_numVecs = C_tmp->getNumVectors ();
4251 auto C_lcl = C_tmp->getLocalViewDevice(Access::ReadWrite);
4252 auto C_sub = Kokkos::subview (C_lcl,
4253 std::make_pair (LO (0), C_lclNumRows),
4254 std::make_pair (LO (0), C_numVecs));
4255 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4256 (transA == Teuchos::TRANS ?
'T' :
'C'));
4257 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4258 (transB == Teuchos::TRANS ?
'T' :
'C'));
4261 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
4263 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4267 if (! isConstantStride ()) {
4272 A_tmp = Teuchos::null;
4273 B_tmp = Teuchos::null;
4281 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4290 using Kokkos::subview;
4291 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4293 const size_t lclNumRows = this->getLocalLength ();
4294 const size_t numVecs = this->getNumVectors ();
4296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4298 std::runtime_error,
"MultiVectors do not have the same local length.");
4299 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4300 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4301 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4304 auto this_view = this->getLocalViewDevice(Access::ReadWrite);
4314 KokkosBlas::mult (scalarThis,
4317 subview (A_view, ALL (), 0),
4321 for (
size_t j = 0; j < numVecs; ++j) {
4322 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4324 KokkosBlas::mult (scalarThis,
4325 subview (this_view, ALL (), C_col),
4327 subview (A_view, ALL (), 0),
4328 subview (B_view, ALL (), B_col));
4333 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4339 using ::Tpetra::Details::ProfilingRegion;
4340 ProfilingRegion region (
"Tpetra::MV::reduce");
4342 const auto map = this->getMap ();
4343 if (map.get () ==
nullptr) {
4346 const auto comm = map->getComm ();
4347 if (comm.get () ==
nullptr) {
4353 const bool changed_on_device = this->need_sync_host ();
4354 if (changed_on_device) {
4358 Kokkos::fence(
"MultiVector::reduce");
4359 auto X_lcl = this->getLocalViewDevice(Access::ReadWrite);
4363 auto X_lcl = this->getLocalViewHost(Access::ReadWrite);
4368 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4376 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4377 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4378 TEUCHOS_TEST_FOR_EXCEPTION(
4379 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4381 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4382 <<
" is invalid. The range of valid row indices on this process "
4383 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4384 <<
", " << maxLocalIndex <<
"].");
4385 TEUCHOS_TEST_FOR_EXCEPTION(
4386 vectorIndexOutOfRange(col),
4388 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4389 <<
" of the multivector is invalid.");
4392 auto view = this->getLocalViewHost(Access::ReadWrite);
4393 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4394 view (lclRow, colInd) = ScalarValue;
4398 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4407 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4408 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4409 TEUCHOS_TEST_FOR_EXCEPTION(
4410 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4412 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4413 <<
" is invalid. The range of valid row indices on this process "
4414 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4415 <<
", " << maxLocalIndex <<
"].");
4416 TEUCHOS_TEST_FOR_EXCEPTION(
4417 vectorIndexOutOfRange(col),
4419 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4420 <<
" of the multivector is invalid.");
4423 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4425 auto view = this->getLocalViewHost(Access::ReadWrite);
4427 Kokkos::atomic_add (& (view(lclRow, colInd)), value);
4430 view(lclRow, colInd) += value;
4435 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4444 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4447 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4448 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4449 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4451 "Global row index " << gblRow <<
" is not present on this process "
4452 << this->getMap ()->getComm ()->getRank () <<
".");
4453 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4454 (this->vectorIndexOutOfRange (col), std::runtime_error,
4455 "Vector index " << col <<
" of the MultiVector is invalid.");
4458 this->replaceLocalValue (lclRow, col, ScalarValue);
4461 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4471 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4474 TEUCHOS_TEST_FOR_EXCEPTION(
4475 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4477 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4478 <<
" is not present on this process "
4479 << this->getMap ()->getComm ()->getRank () <<
".");
4480 TEUCHOS_TEST_FOR_EXCEPTION(
4481 vectorIndexOutOfRange(col),
4483 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4484 <<
" of the multivector is invalid.");
4487 this->sumIntoLocalValue (lclRow, col, value, atomic);
4490 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4492 Teuchos::ArrayRCP<T>
4498 typename dual_view_type::array_layout,
4500 const size_t col = isConstantStride () ? j : whichVectors_[j];
4501 col_dual_view_type X_col =
4502 Kokkos::subview (view_, Kokkos::ALL (), col);
4503 return Kokkos::Compat::persistingView (X_col.view_device());
4506 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4513 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4520 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4525 using Teuchos::TypeNameTraits;
4527 std::ostringstream out;
4528 out <<
"\"" << className <<
"\": {";
4529 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4530 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4531 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4532 <<
", Node" << Node::name ()
4534 if (this->getObjectLabel () !=
"") {
4535 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4537 out <<
", numRows: " << this->getGlobalLength ();
4538 if (className !=
"Tpetra::Vector") {
4539 out <<
", numCols: " << this->getNumVectors ()
4540 <<
", isConstantStride: " << this->isConstantStride ();
4542 if (this->isConstantStride ()) {
4543 out <<
", columnStride: " << this->getStride ();
4550 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4555 return this->descriptionImpl (
"Tpetra::MultiVector");
4560 template<
typename ViewType>
4561 void print_vector(Teuchos::FancyOStream & out,
const char* prefix,
const ViewType& v)
4564 static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
4565 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
4566 static_assert(ViewType::rank == 2,
4567 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
4570 out <<
"Values("<<prefix<<
"): " << std::endl
4572 const size_t numRows = v.extent(0);
4573 const size_t numCols = v.extent(1);
4575 for (
size_t i = 0; i < numRows; ++i) {
4577 if (i + 1 < numRows) {
4583 for (
size_t i = 0; i < numRows; ++i) {
4584 for (
size_t j = 0; j < numCols; ++j) {
4586 if (j + 1 < numCols) {
4590 if (i + 1 < numRows) {
4599 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4604 typedef LocalOrdinal LO;
4607 if (vl <= Teuchos::VERB_LOW) {
4608 return std::string ();
4610 auto map = this->getMap ();
4611 if (map.is_null ()) {
4612 return std::string ();
4614 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4615 auto outp = Teuchos::getFancyOStream (outStringP);
4616 Teuchos::FancyOStream& out = *outp;
4617 auto comm = map->getComm ();
4618 const int myRank = comm->getRank ();
4619 const int numProcs = comm->getSize ();
4621 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4622 Teuchos::OSTab tab1 (out);
4625 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4626 out <<
"Local number of rows: " << lclNumRows << endl;
4628 if (vl > Teuchos::VERB_MEDIUM) {
4631 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4632 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4634 if (this->isConstantStride ()) {
4635 out <<
"Column stride: " << this->getStride () << endl;
4638 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4646 auto X_dev = view_.getDeviceCopy();
4647 auto X_host = view_.getHostCopy();
4649 if(X_dev.data() == X_host.data()) {
4651 Details::print_vector(out,
"unified",X_host);
4654 Details::print_vector(out,
"host",X_host);
4655 Details::print_vector(out,
"dev",X_dev);
4660 return outStringP->str ();
4663 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4667 const std::string& className,
4668 const Teuchos::EVerbosityLevel verbLevel)
const
4670 using Teuchos::TypeNameTraits;
4671 using Teuchos::VERB_DEFAULT;
4672 using Teuchos::VERB_NONE;
4673 using Teuchos::VERB_LOW;
4675 const Teuchos::EVerbosityLevel vl =
4676 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4678 if (vl == VERB_NONE) {
4685 auto map = this->getMap ();
4686 if (map.is_null ()) {
4689 auto comm = map->getComm ();
4690 if (comm.is_null ()) {
4694 const int myRank = comm->getRank ();
4703 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4707 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4708 out <<
"\"" << className <<
"\":" << endl;
4709 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4711 out <<
"Template parameters:" << endl;
4712 Teuchos::OSTab tab2 (out);
4713 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4714 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4715 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4716 <<
"Node: " << Node::name () << endl;
4718 if (this->getObjectLabel () !=
"") {
4719 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4721 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4722 if (className !=
"Tpetra::Vector") {
4723 out <<
"Number of columns: " << this->getNumVectors () << endl;
4730 if (vl > VERB_LOW) {
4731 const std::string lclStr = this->localDescribeToString (vl);
4732 ::Tpetra::Details::gathervPrint (out, lclStr, *comm);
4736 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4740 const Teuchos::EVerbosityLevel verbLevel)
const
4742 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4745 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4750 replaceMap (newMap);
4753 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4758 using ::Tpetra::Details::localDeepCopy;
4759 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4761 TEUCHOS_TEST_FOR_EXCEPTION
4763 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4764 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4765 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4766 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4767 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4769 TEUCHOS_TEST_FOR_EXCEPTION
4770 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4771 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4772 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4773 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4788 if (src_last_updated_on_host) {
4789 localDeepCopy (this->getLocalViewHost(Access::ReadWrite),
4791 this->isConstantStride (),
4793 this->whichVectors_ (),
4797 localDeepCopy (this->getLocalViewDevice(Access::ReadWrite),
4799 this->isConstantStride (),
4801 this->whichVectors_ (),
4806 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4808 Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
4815 this->getNumVectors(),
4821 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4826 using ::Tpetra::Details::PackTraits;
4828 const size_t l1 = this->getLocalLength();
4830 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4837 template <
class ST,
class LO,
class GO,
class NT>
4840 std::swap(mv.
map_, this->map_);
4841 std::swap(mv.
view_, this->view_);
4845 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4846 template <
class ST,
class LO,
class GO,
class NT>
4849 const Teuchos::SerialDenseMatrix<int, ST>& src)
4851 using ::Tpetra::Details::localDeepCopy;
4853 using IST =
typename MV::impl_scalar_type;
4854 using input_view_type =
4855 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4856 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4857 using pair_type = std::pair<LO, LO>;
4859 const LO numRows =
static_cast<LO
> (src.numRows ());
4860 const LO numCols =
static_cast<LO
> (src.numCols ());
4862 TEUCHOS_TEST_FOR_EXCEPTION
4865 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4866 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4868 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4870 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4871 input_view_type src_orig (src_raw, src.stride (), numCols);
4872 input_view_type src_view =
4873 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4875 constexpr
bool src_isConstantStride =
true;
4876 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4880 src_isConstantStride,
4881 getMultiVectorWhichVectors (dst),
4885 template <
class ST,
class LO,
class GO,
class NT>
4887 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4890 using ::Tpetra::Details::localDeepCopy;
4892 using IST =
typename MV::impl_scalar_type;
4893 using output_view_type =
4894 Kokkos::View<IST**, Kokkos::LayoutLeft,
4895 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4896 using pair_type = std::pair<LO, LO>;
4898 const LO numRows =
static_cast<LO
> (dst.numRows ());
4899 const LO numCols =
static_cast<LO
> (dst.numCols ());
4901 TEUCHOS_TEST_FOR_EXCEPTION
4904 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4905 << src.
getMap ()->getComm ()->getRank () <<
", src is "
4907 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4909 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4910 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4912 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4914 constexpr
bool dst_isConstantStride =
true;
4915 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4918 localDeepCopy (dst_view,
4920 dst_isConstantStride,
4923 getMultiVectorWhichVectors (src));
4925 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4927 template <
class Scalar,
class LO,
class GO,
class NT>
4928 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4933 return Teuchos::rcp (
new MV (map, numVectors));
4936 template <
class ST,
class LO,
class GO,
class NT>
4954 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4955 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4956 template class MultiVector< SCALAR , LO , GO , NODE >; \
4957 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4958 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4959 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4960 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4963 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4964 template class MultiVector< SCALAR , LO , GO , NODE >; \
4965 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4967 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4970 #define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
4972 template Teuchos::RCP< MultiVector< SO , LO , GO , NODE > > \
4973 MultiVector< SI , LO , GO , NODE >::convert< SO > () const;
4976 #endif // TPETRA_MULTIVECTOR_DEF_HPP
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
One or more distributed dense vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
static bool debug()
Whether Tpetra is in debug mode.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
int local_ordinal_type
Default value of Scalar template parameter.
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Insert new values that don't currently exist.
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
wrapped_dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
Functions for initializing and finalizing Tpetra.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Declaration of Tpetra::Details::isInterComm.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
Replace existing values with new values.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
void start()
Start the deep_copy counter.
A parallel distribution of indices over processes.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
Base class for distributed Tpetra objects that support data redistribution.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
EWhichNorm
Input argument for normImpl() (which see).
Accumulate new values into existing values (may not be supported in all classes)
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.