10 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
11 #define TPETRA_MULTIVECTOR_DEF_HPP
23 #include "Tpetra_Vector.hpp"
35 #include "Tpetra_Details_Random.hpp"
36 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
37 #include "Teuchos_SerialDenseMatrix.hpp"
38 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
39 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
40 #include "KokkosCompat_View.hpp"
41 #include "KokkosBlas.hpp"
42 #include "KokkosKernels_Utils.hpp"
43 #include "Kokkos_Random.hpp"
44 #if KOKKOS_VERSION >= 40799
45 #include "KokkosKernels_ArithTraits.hpp"
47 #include "Kokkos_ArithTraits.hpp"
52 #ifdef HAVE_TPETRA_INST_FLOAT128
55 template <
class Generator>
56 struct rand<Generator, __float128> {
57 static KOKKOS_INLINE_FUNCTION __float128 max() {
58 return static_cast<__float128
>(1.0);
60 static KOKKOS_INLINE_FUNCTION __float128
61 draw(Generator& gen) {
64 const __float128 scalingFactor =
65 static_cast<__float128
>(std::numeric_limits<double>::min()) /
66 static_cast<__float128>(2.0);
67 const __float128 higherOrderTerm =
static_cast<__float128
>(gen.drand());
68 const __float128 lowerOrderTerm =
69 static_cast<__float128
>(gen.drand()) * scalingFactor;
70 return higherOrderTerm + lowerOrderTerm;
72 static KOKKOS_INLINE_FUNCTION __float128
73 draw(Generator& gen,
const __float128& range) {
75 const __float128 scalingFactor =
76 static_cast<__float128
>(std::numeric_limits<double>::min()) /
77 static_cast<__float128>(2.0);
78 const __float128 higherOrderTerm =
79 static_cast<__float128
>(gen.drand(range));
80 const __float128 lowerOrderTerm =
81 static_cast<__float128
>(gen.drand(range)) * scalingFactor;
82 return higherOrderTerm + lowerOrderTerm;
84 static KOKKOS_INLINE_FUNCTION __float128
85 draw(Generator& gen,
const __float128&
start,
const __float128& end) {
87 const __float128 scalingFactor =
88 static_cast<__float128
>(std::numeric_limits<double>::min()) /
89 static_cast<__float128>(2.0);
90 const __float128 higherOrderTerm =
91 static_cast<__float128
>(gen.drand(start, end));
92 const __float128 lowerOrderTerm =
93 static_cast<__float128
>(gen.drand(start, end)) * scalingFactor;
94 return higherOrderTerm + lowerOrderTerm;
98 #endif // HAVE_TPETRA_INST_FLOAT128
116 template <
class ST,
class LO,
class GO,
class NT>
118 allocDualView(
const size_t lclNumRows,
119 const size_t numCols,
120 const bool zeroOut =
true,
121 const bool allowPadding =
false) {
122 using Kokkos::AllowPadding;
123 using Kokkos::view_alloc;
124 using Kokkos::WithoutInitializing;
125 using ::Tpetra::Details::Behavior;
128 typedef typename dual_view_type::t_dev dev_view_type;
134 const std::string label(
"MV::DualView");
135 const bool debug = Behavior::debug();
145 dev_view_type d_view;
148 d_view = dev_view_type(view_alloc(label, AllowPadding),
149 lclNumRows, numCols);
151 d_view = dev_view_type(view_alloc(label),
152 lclNumRows, numCols);
156 d_view = dev_view_type(view_alloc(label,
159 lclNumRows, numCols);
161 d_view = dev_view_type(view_alloc(label, WithoutInitializing),
162 lclNumRows, numCols);
173 #if KOKKOS_VERSION >= 40799
174 const ST nan = KokkosKernels::ArithTraits<ST>::nan();
176 const ST nan = Kokkos::ArithTraits<ST>::nan();
178 KokkosBlas::fill(d_view, nan);
182 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(d_view.extent(0)) != lclNumRows ||
183 static_cast<size_t>(d_view.extent(1)) != numCols,
185 "allocDualView: d_view's dimensions actual dimensions do not match "
186 "requested dimensions. d_view is "
187 << d_view.extent(0) <<
" x " << d_view.extent(1) <<
"; requested " << lclNumRows <<
" x " << numCols
188 <<
". Please report this bug to the Tpetra developers.");
191 return wrapped_dual_view_type(d_view);
198 template <
class T,
class ExecSpace>
199 struct MakeUnmanagedView {
209 typedef typename std::conditional<
210 Kokkos::SpaceAccessibility<
211 typename ExecSpace::memory_space,
212 Kokkos::HostSpace>::accessible,
213 typename ExecSpace::device_type,
214 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
215 typedef Kokkos::LayoutLeft array_layout;
216 typedef Kokkos::View<T*, array_layout, host_exec_space,
217 Kokkos::MemoryUnmanaged>
220 static view_type getView(
const Teuchos::ArrayView<T>& x_in) {
221 const size_t numEnt =
static_cast<size_t>(x_in.size());
225 return view_type(x_in.getRawPtr(), numEnt);
230 template <
class WrappedDualViewType>
232 takeSubview(
const WrappedDualViewType& X,
233 const std::pair<size_t, size_t>& rowRng,
234 const Kokkos::ALL_t& colRng)
238 return WrappedDualViewType(X, rowRng, colRng);
241 template <
class WrappedDualViewType>
243 takeSubview(
const WrappedDualViewType& X,
244 const Kokkos::ALL_t& rowRng,
245 const std::pair<size_t, size_t>& colRng) {
246 using DualViewType =
typename WrappedDualViewType::DVT;
249 if (X.extent(0) == 0 && X.extent(1) != 0) {
250 return WrappedDualViewType(DualViewType(
"MV::DualView", 0, colRng.second - colRng.first));
252 return WrappedDualViewType(X, rowRng, colRng);
256 template <
class WrappedDualViewType>
258 takeSubview(
const WrappedDualViewType& X,
259 const std::pair<size_t, size_t>& rowRng,
260 const std::pair<size_t, size_t>& colRng) {
261 using DualViewType =
typename WrappedDualViewType::DVT;
271 if (X.extent(0) == 0 && X.extent(1) != 0) {
272 return WrappedDualViewType(DualViewType(
"MV::DualView", 0, colRng.second - colRng.first));
274 return WrappedDualViewType(X, rowRng, colRng);
278 template <
class WrappedOrNotDualViewType>
280 getDualViewStride(
const WrappedOrNotDualViewType& dv) {
285 size_t strides[WrappedOrNotDualViewType::t_dev::rank + 1];
287 const size_t LDA = strides[1];
288 const size_t numRows = dv.extent(0);
291 return (numRows == 0) ? size_t(1) : numRows;
297 template <
class ViewType>
299 getViewStride(
const ViewType& view) {
300 const size_t LDA = view.stride(1);
301 const size_t numRows = view.extent(0);
304 return (numRows == 0) ? size_t(1) : numRows;
310 template <
class impl_scalar_type,
class buffer_device_type>
311 bool runKernelOnHost(
312 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports) {
313 if (!imports.need_sync_device()) {
317 size_t localLengthThreshold =
319 return imports.extent(0) <= localLengthThreshold;
323 template <
class SC,
class LO,
class GO,
class NT>
324 bool runKernelOnHost(const ::Tpetra::MultiVector<SC, LO, GO, NT>& X) {
325 if (!X.need_sync_device()) {
329 size_t localLengthThreshold =
331 return X.getLocalLength() <= localLengthThreshold;
335 template <
class SC,
class LO,
class GO,
class NT>
336 void multiVectorNormImpl(typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
339 using namespace Tpetra;
342 using val_type =
typename MV::impl_scalar_type;
343 using mag_type =
typename MV::mag_type;
344 using dual_view_type =
typename MV::dual_view_type;
347 auto comm = map.is_null() ?
nullptr : map->getComm().getRawPtr();
348 auto whichVecs = getMultiVectorWhichVectors(X);
352 const bool runOnHost = runKernelOnHost(X);
354 using view_type =
typename dual_view_type::t_host;
355 using array_layout =
typename view_type::array_layout;
356 using device_type =
typename view_type::device_type;
359 normImpl<val_type, array_layout, device_type,
360 mag_type>(norms, X_lcl, whichNorm, whichVecs,
361 isConstantStride, isDistributed, comm);
363 using view_type =
typename dual_view_type::t_dev;
364 using array_layout =
typename view_type::array_layout;
365 using device_type =
typename view_type::device_type;
368 normImpl<val_type, array_layout, device_type,
369 mag_type>(norms, X_lcl, whichNorm, whichVecs,
370 isConstantStride, isDistributed, comm);
378 template <
typename DstView,
typename SrcView>
379 struct AddAssignFunctor {
382 AddAssignFunctor(DstView& tgt_, SrcView& src_)
386 KOKKOS_INLINE_FUNCTION
void
387 operator()(
const size_t k)
const { tgt(k) += src(k); }
394 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
397 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
400 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
405 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
408 const size_t numVecs,
415 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node>(lclNumRows, numVecs, zeroOut);
418 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
421 const Teuchos::DataAccess copyOrView)
423 , view_(source.view_)
424 , whichVectors_(source.whichVectors_) {
426 const char tfecfFuncName[] =
427 "MultiVector(const MultiVector&, "
428 "const Teuchos::DataAccess): ";
432 if (copyOrView == Teuchos::Copy) {
436 this->
view_ = cpy.view_;
438 }
else if (copyOrView == Teuchos::View) {
440 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
441 true, std::invalid_argument,
442 "Second argument 'copyOrView' has an "
444 << copyOrView <<
". Valid values include "
446 << Teuchos::Copy <<
" and Teuchos::View = "
447 << Teuchos::View <<
".");
451 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
454 const dual_view_type& view)
456 , view_(wrapped_dual_view_type(view)) {
457 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
458 const size_t lclNumRows_map = map.is_null() ? size_t(0) : map->getLocalNumElements();
459 const size_t lclNumRows_view = view.extent(0);
460 const size_t LDA = getDualViewStride(
view_);
462 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
463 std::invalid_argument,
464 "Kokkos::DualView does not match Map. "
465 "map->getLocalNumElements() = "
467 <<
", view.extent(0) = " << lclNumRows_view
468 <<
", and getStride() = " << LDA <<
".");
470 using ::Tpetra::Details::Behavior;
471 const bool debug = Behavior::debug();
473 using ::Tpetra::Details::checkGlobalDualViewValidity;
474 std::ostringstream gblErrStrm;
475 const bool verbose = Behavior::verbose();
476 const auto comm = map.is_null() ? Teuchos::null : map->getComm();
477 const bool gblValid =
478 checkGlobalDualViewValidity(&gblErrStrm, view, verbose,
480 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!gblValid, std::runtime_error, gblErrStrm.str());
484 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
487 const wrapped_dual_view_type& view)
490 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
491 const size_t lclNumRows_map = map.is_null() ? size_t(0) : map->getLocalNumElements();
492 const size_t lclNumRows_view = view.extent(0);
493 const size_t LDA = getDualViewStride(view);
495 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
496 std::invalid_argument,
497 "Kokkos::DualView does not match Map. "
498 "map->getLocalNumElements() = "
500 <<
", view.extent(0) = " << lclNumRows_view
501 <<
", and getStride() = " << LDA <<
".");
503 using ::Tpetra::Details::Behavior;
504 const bool debug = Behavior::debug();
506 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
507 std::ostringstream gblErrStrm;
508 const bool verbose = Behavior::verbose();
509 const auto comm = map.is_null() ? Teuchos::null : map->getComm();
510 const bool gblValid =
511 checkGlobalWrappedDualViewValidity(&gblErrStrm, view, verbose,
513 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!gblValid, std::runtime_error, gblErrStrm.str());
517 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
520 const typename dual_view_type::t_dev& d_view)
522 using Teuchos::ArrayRCP;
524 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
528 const size_t LDA = getViewStride(d_view);
529 const size_t lclNumRows = map->getLocalNumElements();
530 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < lclNumRows, std::invalid_argument,
531 "Map does not match "
532 "Kokkos::View. map->getLocalNumElements() = "
534 <<
", View's column stride = " << LDA
535 <<
", and View's extent(0) = " << d_view.extent(0) <<
".");
537 auto h_view = Kokkos::create_mirror_view(d_view);
539 view_ = wrapped_dual_view_type(dual_view);
541 using ::Tpetra::Details::Behavior;
542 const bool debug = Behavior::debug();
544 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
545 std::ostringstream gblErrStrm;
546 const bool verbose = Behavior::verbose();
547 const auto comm = map.is_null() ? Teuchos::null : map->getComm();
548 const bool gblValid =
549 checkGlobalWrappedDualViewValidity(&gblErrStrm,
view_, verbose,
551 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!gblValid, std::runtime_error, gblErrStrm.str());
556 dual_view.modify_device();
559 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
562 const dual_view_type& view,
563 const dual_view_type& origView)
565 , view_(wrapped_dual_view_type(view, origView)) {
566 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
568 const size_t LDA = getDualViewStride(origView);
570 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
571 LDA < lclNumRows, std::invalid_argument,
572 "The input Kokkos::DualView's "
573 "column stride LDA = "
574 << LDA <<
" < getLocalLength() = " << lclNumRows
575 <<
". This may also mean that the input origView's first dimension (number "
577 << origView.extent(0) <<
") does not not match the number "
578 "of entries in the Map on the calling process.");
580 using ::Tpetra::Details::Behavior;
581 const bool debug = Behavior::debug();
583 using ::Tpetra::Details::checkGlobalDualViewValidity;
584 std::ostringstream gblErrStrm;
585 const bool verbose = Behavior::verbose();
586 const auto comm = map.is_null() ? Teuchos::null : map->getComm();
587 const bool gblValid_0 =
588 checkGlobalDualViewValidity(&gblErrStrm, view, verbose,
590 const bool gblValid_1 =
591 checkGlobalDualViewValidity(&gblErrStrm, origView, verbose,
593 const bool gblValid = gblValid_0 && gblValid_1;
594 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!gblValid, std::runtime_error, gblErrStrm.str());
598 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
601 const dual_view_type& view,
602 const Teuchos::ArrayView<const size_t>& whichVectors)
605 , whichVectors_(whichVectors.begin(), whichVectors.end()) {
607 using Kokkos::subview;
608 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
610 using ::Tpetra::Details::Behavior;
611 const bool debug = Behavior::debug();
613 using ::Tpetra::Details::checkGlobalDualViewValidity;
614 std::ostringstream gblErrStrm;
615 const bool verbose = Behavior::verbose();
616 const auto comm = map.is_null() ? Teuchos::null : map->getComm();
617 const bool gblValid =
618 checkGlobalDualViewValidity(&gblErrStrm, view, verbose,
620 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!gblValid, std::runtime_error, gblErrStrm.str());
623 const size_t lclNumRows = map.is_null() ? size_t(0) : map->getLocalNumElements();
630 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
631 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(0)) < lclNumRows,
632 std::invalid_argument,
"view.extent(0) = " << view.extent(0) <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
633 if (whichVectors.size() != 0) {
634 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
635 view.extent(1) != 0 && view.extent(1) == 0,
636 std::invalid_argument,
637 "view.extent(1) = 0, but whichVectors.size()"
639 << whichVectors.size() <<
" > 0.");
640 size_t maxColInd = 0;
641 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
642 for (size_type k = 0; k < whichVectors.size(); ++k) {
643 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
644 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid(),
645 std::invalid_argument,
"whichVectors[" << k <<
"] = "
646 "Teuchos::OrdinalTraits<size_t>::invalid().");
647 maxColInd = std::max(maxColInd, whichVectors[k]);
649 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
650 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(1)) <= maxColInd,
651 std::invalid_argument,
"view.extent(1) = " << view.extent(1) <<
" <= max(whichVectors) = " << maxColInd <<
".");
656 const size_t LDA = getDualViewStride(view);
657 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < lclNumRows, std::invalid_argument,
658 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
660 if (whichVectors.size() == 1) {
670 const std::pair<size_t, size_t> colRng(whichVectors[0],
671 whichVectors[0] + 1);
678 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
681 const wrapped_dual_view_type& view,
682 const Teuchos::ArrayView<const size_t>& whichVectors)
685 , whichVectors_(whichVectors.begin(), whichVectors.end()) {
687 using Kokkos::subview;
688 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
690 using ::Tpetra::Details::Behavior;
691 const bool debug = Behavior::debug();
693 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
694 std::ostringstream gblErrStrm;
695 const bool verbose = Behavior::verbose();
696 const auto comm = map.is_null() ? Teuchos::null : map->getComm();
697 const bool gblValid =
698 checkGlobalWrappedDualViewValidity(&gblErrStrm, view, verbose,
700 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!gblValid, std::runtime_error, gblErrStrm.str());
703 const size_t lclNumRows = map.is_null() ? size_t(0) : map->getLocalNumElements();
710 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
711 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(0)) < lclNumRows,
712 std::invalid_argument,
"view.extent(0) = " << view.extent(0) <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
713 if (whichVectors.size() != 0) {
714 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
715 view.extent(1) != 0 && view.extent(1) == 0,
716 std::invalid_argument,
717 "view.extent(1) = 0, but whichVectors.size()"
719 << whichVectors.size() <<
" > 0.");
720 size_t maxColInd = 0;
721 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
722 for (size_type k = 0; k < whichVectors.size(); ++k) {
723 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
724 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid(),
725 std::invalid_argument,
"whichVectors[" << k <<
"] = "
726 "Teuchos::OrdinalTraits<size_t>::invalid().");
727 maxColInd = std::max(maxColInd, whichVectors[k]);
729 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
730 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(1)) <= maxColInd,
731 std::invalid_argument,
"view.extent(1) = " << view.extent(1) <<
" <= max(whichVectors) = " << maxColInd <<
".");
736 const size_t LDA = getDualViewStride(view);
737 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < lclNumRows, std::invalid_argument,
738 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
740 if (whichVectors.size() == 1) {
750 const std::pair<size_t, size_t> colRng(whichVectors[0],
751 whichVectors[0] + 1);
758 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
761 const dual_view_type& view,
762 const dual_view_type& origView,
763 const Teuchos::ArrayView<const size_t>& whichVectors)
765 , view_(wrapped_dual_view_type(view, origView))
766 , whichVectors_(whichVectors.begin(), whichVectors.end()) {
768 using Kokkos::subview;
769 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
771 using ::Tpetra::Details::Behavior;
772 const bool debug = Behavior::debug();
774 using ::Tpetra::Details::checkGlobalDualViewValidity;
775 std::ostringstream gblErrStrm;
776 const bool verbose = Behavior::verbose();
777 const auto comm = map.is_null() ? Teuchos::null : map->getComm();
778 const bool gblValid_0 =
779 checkGlobalDualViewValidity(&gblErrStrm, view, verbose,
781 const bool gblValid_1 =
782 checkGlobalDualViewValidity(&gblErrStrm, origView, verbose,
784 const bool gblValid = gblValid_0 && gblValid_1;
785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!gblValid, std::runtime_error, gblErrStrm.str());
795 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
796 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(0)) < lclNumRows,
797 std::invalid_argument,
"view.extent(0) = " << view.extent(0) <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
798 if (whichVectors.size() != 0) {
799 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
800 view.extent(1) != 0 && view.extent(1) == 0,
801 std::invalid_argument,
802 "view.extent(1) = 0, but whichVectors.size()"
804 << whichVectors.size() <<
" > 0.");
805 size_t maxColInd = 0;
806 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
807 for (size_type k = 0; k < whichVectors.size(); ++k) {
808 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
809 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid(),
810 std::invalid_argument,
"whichVectors[" << k <<
"] = "
811 "Teuchos::OrdinalTraits<size_t>::invalid().");
812 maxColInd = std::max(maxColInd, whichVectors[k]);
814 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
815 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(1)) <= maxColInd,
816 std::invalid_argument,
"view.extent(1) = " << view.extent(1) <<
" <= max(whichVectors) = " << maxColInd <<
".");
821 const size_t LDA = getDualViewStride(origView);
822 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < lclNumRows, std::invalid_argument,
823 "Map and DualView origView "
824 "do not match. LDA = "
825 << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
826 <<
", origView.stride(1) = " << origView.view_device().stride(1) <<
".");
828 if (whichVectors.size() == 1) {
837 const std::pair<size_t, size_t> colRng(whichVectors[0],
838 whichVectors[0] + 1);
846 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
849 const Teuchos::ArrayView<const Scalar>& data,
851 const size_t numVecs)
853 typedef LocalOrdinal LO;
854 typedef GlobalOrdinal GO;
855 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
861 const size_t lclNumRows =
862 map.is_null() ? size_t(0) : map->getLocalNumElements();
863 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
864 "map->getLocalNumElements() = "
865 << lclNumRows <<
".");
867 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
868 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(data.size()) < minNumEntries,
869 std::invalid_argument,
870 "Input Teuchos::ArrayView does not have enough "
871 "entries, given the input Map and number of vectors in the MultiVector."
873 << data.size() <<
" < (LDA*(numVecs-1)) + "
874 "map->getLocalNumElements () = "
875 << minNumEntries <<
".");
878 this->
view_ = allocDualView<Scalar, LO, GO, Node>(lclNumRows, numVecs);
890 Kokkos::MemoryUnmanaged>
891 X_in_orig(X_in_raw, LDA, numVecs);
892 const Kokkos::pair<size_t, size_t> rowRng(0, lclNumRows);
893 auto X_in = Kokkos::subview(X_in_orig, rowRng, Kokkos::ALL());
898 const size_t outStride =
899 X_out.extent(1) == 0 ? size_t(1) : X_out.stride(1);
900 if (LDA == outStride) {
906 typedef decltype(Kokkos::subview(X_out, Kokkos::ALL(), 0))
908 typedef decltype(Kokkos::subview(X_in, Kokkos::ALL(), 0))
910 for (
size_t j = 0; j < numVecs; ++j) {
911 out_col_view_type X_out_j = Kokkos::subview(X_out, Kokkos::ALL(), j);
912 in_col_view_type X_in_j = Kokkos::subview(X_in, Kokkos::ALL(), j);
919 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
921 MultiVector(
const Teuchos::RCP<const map_type>& map,
922 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
923 const size_t numVecs)
926 typedef LocalOrdinal LO;
927 typedef GlobalOrdinal GO;
928 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
931 const size_t lclNumRows =
932 map.is_null() ? size_t(0) : map->getLocalNumElements();
933 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numVecs < 1 || numVecs != static_cast<size_t>(ArrayOfPtrs.size()),
934 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
935 "ArrayOfPtrs.size() (= "
936 << ArrayOfPtrs.size() <<
") != numVecs.");
937 for (
size_t j = 0; j < numVecs; ++j) {
938 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
939 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
940 static_cast<size_t>(X_j_av.size()) < lclNumRows,
941 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = " << X_j_av.size() <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
944 view_ = allocDualView<Scalar, LO, GO, Node>(lclNumRows, numVecs);
949 using array_layout =
typename decltype(X_out)::array_layout;
950 using input_col_view_type =
typename Kokkos::View<
const IST*,
953 Kokkos::MemoryUnmanaged>;
955 const std::pair<size_t, size_t> rowRng(0, lclNumRows);
956 for (
size_t j = 0; j < numVecs; ++j) {
957 Teuchos::ArrayView<const IST> X_j_av =
958 Teuchos::av_reinterpret_cast<
const IST>(ArrayOfPtrs[j]);
959 input_col_view_type X_j_in(X_j_av.getRawPtr(), lclNumRows);
960 auto X_j_out = Kokkos::subview(X_out, rowRng, j);
966 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
969 return whichVectors_.empty();
972 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
976 if (this->getMap().is_null()) {
977 return static_cast<size_t>(0);
979 return this->getMap()->getLocalNumElements();
983 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
987 if (this->getMap().is_null()) {
988 return static_cast<size_t>(0);
990 return this->getMap()->getGlobalNumElements();
994 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
998 return isConstantStride() ? getDualViewStride(view_) : size_t(0);
1001 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1005 auto thisData = view_.getDualView().view_host().data();
1006 auto otherData = other.
view_.getDualView().view_host().data();
1007 size_t thisSpan = view_.getDualView().view_host().span();
1008 size_t otherSpan = other.
view_.getDualView().view_host().span();
1009 return (otherData <= thisData && thisData < otherData + otherSpan) || (thisData <= otherData && otherData < thisData + thisSpan);
1012 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1019 const MV* src =
dynamic_cast<const MV*
>(&sourceObj);
1020 if (src ==
nullptr) {
1033 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1037 return this->getNumVectors();
1040 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1043 const size_t numSameIDs,
1044 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1045 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1047 const execution_space& space) {
1048 using Kokkos::Compat::create_const_view;
1049 using KokkosRefactor::Details::permute_array_multi_column;
1050 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
1052 using ::Tpetra::Details::Behavior;
1054 using ::Tpetra::Details::ProfilingRegion;
1058 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
>(sourceObj));
1061 if (importsAreAliased() && (this->constantNumberOfPackets() != 0) && Behavior::enableGranularTransfers()) {
1066 copyOnHost = !Behavior::assumeMpiIsGPUAware();
1069 copyOnHost = runKernelOnHost(sourceMV);
1071 const char longFuncNameHost[] =
"Tpetra::MultiVector::copyAndPermute[Host]";
1072 const char longFuncNameDevice[] =
"Tpetra::MultiVector::copyAndPermute[Device]";
1073 const char tfecfFuncName[] =
"copyAndPermute: ";
1074 ProfilingRegion regionCAP(copyOnHost ? longFuncNameHost : longFuncNameDevice);
1076 const bool verbose = Behavior::verbose();
1077 std::unique_ptr<std::string> prefix;
1079 auto map = this->getMap();
1080 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1081 const int myRank = comm.is_null() ? -1 : comm->getRank();
1082 std::ostringstream os;
1083 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
1084 prefix = std::unique_ptr<std::string>(
new std::string(os.str()));
1085 os <<
"Start" << endl;
1086 std::cerr << os.str();
1089 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(permuteToLIDs.extent(0) != permuteFromLIDs.extent(0),
1090 std::logic_error,
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent(0) <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0) <<
".");
1091 const size_t numCols = this->getNumVectors();
1095 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(sourceMV.need_sync_device() && sourceMV.need_sync_host(),
1097 "Input MultiVector needs sync to both host "
1100 std::ostringstream os;
1101 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1102 std::cerr << os.str();
1106 std::ostringstream os;
1107 os << *prefix <<
"Copy first " << numSameIDs <<
" elements" << endl;
1108 std::cerr << os.str();
1133 if (numSameIDs > 0) {
1134 const std::pair<size_t, size_t> rows(0, numSameIDs);
1136 ProfilingRegion regionC(
"Tpetra::MultiVector::copy[Host]");
1137 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1138 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1140 for (
size_t j = 0; j < numCols; ++j) {
1141 const size_t tgtCol = isConstantStride() ? j : whichVectors_[j];
1142 const size_t srcCol =
1143 sourceMV.isConstantStride() ? j : sourceMV.whichVectors_[j];
1145 auto tgt_j = Kokkos::subview(tgt_h, rows, tgtCol);
1146 auto src_j = Kokkos::subview(src_h, rows, srcCol);
1150 Kokkos::RangePolicy<execution_space, size_t>;
1151 range_t rp(space, 0, numSameIDs);
1152 Tpetra::Details::AddAssignFunctor<decltype(tgt_j), decltype(src_j)>
1154 Kokkos::parallel_for(rp, aaf);
1159 space.fence(
"Tpetra::MultiVector::copyAndPermute-1");
1163 ProfilingRegion regionC(
"Tpetra::MultiVector::copy[Device]");
1164 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1165 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1167 for (
size_t j = 0; j < numCols; ++j) {
1168 const size_t tgtCol = isConstantStride() ? j : whichVectors_[j];
1169 const size_t srcCol =
1170 sourceMV.isConstantStride() ? j : sourceMV.whichVectors_[j];
1172 auto tgt_j = Kokkos::subview(tgt_d, rows, tgtCol);
1173 auto src_j = Kokkos::subview(src_d, rows, srcCol);
1177 Kokkos::RangePolicy<execution_space, size_t>;
1178 range_t rp(space, 0, numSameIDs);
1179 Tpetra::Details::AddAssignFunctor<decltype(tgt_j), decltype(src_j)>
1181 Kokkos::parallel_for(rp, aaf);
1186 space.fence(
"Tpetra::MultiVector::copyAndPermute-2");
1202 if (permuteFromLIDs.extent(0) == 0 ||
1203 permuteToLIDs.extent(0) == 0) {
1205 std::ostringstream os;
1206 os << *prefix <<
"No permutations. Done!" << endl;
1207 std::cerr << os.str();
1211 ProfilingRegion regionP(
"Tpetra::MultiVector::permute");
1214 std::ostringstream os;
1215 os << *prefix <<
"Permute" << endl;
1216 std::cerr << os.str();
1221 const bool nonConstStride =
1222 !this->isConstantStride() || !sourceMV.isConstantStride();
1225 std::ostringstream os;
1226 os << *prefix <<
"nonConstStride="
1227 << (nonConstStride ?
"true" :
"false") << endl;
1228 std::cerr << os.str();
1235 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1236 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1237 if (nonConstStride) {
1238 if (this->whichVectors_.size() == 0) {
1239 Kokkos::DualView<size_t*, device_type> tmpTgt(
"tgtWhichVecs", numCols);
1240 tmpTgt.modify_host();
1241 for (
size_t j = 0; j < numCols; ++j) {
1242 tmpTgt.view_host()(j) = j;
1245 tmpTgt.sync_device();
1247 tgtWhichVecs = tmpTgt;
1249 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_();
1251 getDualViewCopyFromArrayView<size_t, device_type>(tgtWhichVecsT,
1255 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(tgtWhichVecs.extent(0)) !=
1256 this->getNumVectors(),
1257 std::logic_error,
"tgtWhichVecs.extent(0) = " << tgtWhichVecs.extent(0) <<
" != this->getNumVectors() = " << this->getNumVectors() <<
".");
1259 if (sourceMV.whichVectors_.size() == 0) {
1260 Kokkos::DualView<size_t*, device_type> tmpSrc(
"srcWhichVecs", numCols);
1261 tmpSrc.modify_host();
1262 for (
size_t j = 0; j < numCols; ++j) {
1263 tmpSrc.view_host()(j) = j;
1266 tmpSrc.sync_device();
1268 srcWhichVecs = tmpSrc;
1270 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1271 sourceMV.whichVectors_();
1273 getDualViewCopyFromArrayView<size_t, device_type>(srcWhichVecsT,
1277 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(srcWhichVecs.extent(0)) !=
1278 sourceMV.getNumVectors(),
1280 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent(0)
1281 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors()
1287 std::ostringstream os;
1288 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1289 std::cerr << os.str();
1291 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1292 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1294 TEUCHOS_ASSERT(!permuteToLIDs.need_sync_host());
1295 auto permuteToLIDs_h = create_const_view(permuteToLIDs.view_host());
1296 TEUCHOS_ASSERT(!permuteFromLIDs.need_sync_host());
1297 auto permuteFromLIDs_h =
1298 create_const_view(permuteFromLIDs.view_host());
1301 std::ostringstream os;
1302 os << *prefix <<
"Permute on host" << endl;
1303 std::cerr << os.str();
1305 if (nonConstStride) {
1308 auto tgtWhichVecs_h =
1309 create_const_view(tgtWhichVecs.view_host());
1310 auto srcWhichVecs_h =
1311 create_const_view(srcWhichVecs.view_host());
1313 using op_type = KokkosRefactor::Details::AddOp;
1314 permute_array_multi_column_variable_stride(tgt_h, src_h,
1318 srcWhichVecs_h, numCols,
1321 using op_type = KokkosRefactor::Details::InsertOp;
1322 permute_array_multi_column_variable_stride(tgt_h, src_h,
1326 srcWhichVecs_h, numCols,
1331 using op_type = KokkosRefactor::Details::AddOp;
1332 permute_array_multi_column(tgt_h, src_h, permuteToLIDs_h,
1333 permuteFromLIDs_h, numCols, op_type());
1335 using op_type = KokkosRefactor::Details::InsertOp;
1336 permute_array_multi_column(tgt_h, src_h, permuteToLIDs_h,
1337 permuteFromLIDs_h, numCols, op_type());
1342 std::ostringstream os;
1343 os << *prefix <<
"Get permute LIDs on device" << endl;
1344 std::cerr << os.str();
1346 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1347 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1349 TEUCHOS_ASSERT(!permuteToLIDs.need_sync_device());
1350 auto permuteToLIDs_d = create_const_view(permuteToLIDs.view_device());
1351 TEUCHOS_ASSERT(!permuteFromLIDs.need_sync_device());
1352 auto permuteFromLIDs_d =
1353 create_const_view(permuteFromLIDs.view_device());
1356 std::ostringstream os;
1357 os << *prefix <<
"Permute on device" << endl;
1358 std::cerr << os.str();
1360 if (nonConstStride) {
1363 auto tgtWhichVecs_d = create_const_view(tgtWhichVecs.view_device());
1364 auto srcWhichVecs_d = create_const_view(srcWhichVecs.view_device());
1366 using op_type = KokkosRefactor::Details::AddOp;
1367 permute_array_multi_column_variable_stride(space, tgt_d, src_d,
1371 srcWhichVecs_d, numCols,
1374 using op_type = KokkosRefactor::Details::InsertOp;
1375 permute_array_multi_column_variable_stride(space, tgt_d, src_d,
1379 srcWhichVecs_d, numCols,
1384 using op_type = KokkosRefactor::Details::AddOp;
1385 permute_array_multi_column(space, tgt_d, src_d, permuteToLIDs_d,
1386 permuteFromLIDs_d, numCols, op_type());
1388 using op_type = KokkosRefactor::Details::InsertOp;
1389 permute_array_multi_column(space, tgt_d, src_d, permuteToLIDs_d,
1390 permuteFromLIDs_d, numCols, op_type());
1396 std::ostringstream os;
1397 os << *prefix <<
"Done!" << endl;
1398 std::cerr << os.str();
1402 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1405 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1406 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1408 copyAndPermute(sourceObj, numSameIDs, permuteToLIDs, permuteFromLIDs, CM,
1412 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1415 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1416 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1417 Kokkos::DualView<size_t*, buffer_device_type> ,
1418 size_t& constantNumPackets,
1419 const execution_space& space) {
1420 using Kokkos::Compat::create_const_view;
1421 using Kokkos::Compat::getKokkosViewDeepCopy;
1423 using ::Tpetra::Details::Behavior;
1424 using ::Tpetra::Details::ProfilingRegion;
1429 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
>(sourceObj));
1431 const bool packOnHost = runKernelOnHost(sourceMV);
1432 const char longFuncNameHost[] =
"Tpetra::MultiVector::packAndPrepare[Host]";
1433 const char longFuncNameDevice[] =
"Tpetra::MultiVector::packAndPrepare[Device]";
1434 const char tfecfFuncName[] =
"packAndPrepare: ";
1435 ProfilingRegion regionPAP(packOnHost ? longFuncNameHost : longFuncNameDevice);
1443 const bool debugCheckIndices = Behavior::debug();
1448 const bool printDebugOutput = Behavior::verbose();
1450 std::unique_ptr<std::string> prefix;
1451 if (printDebugOutput) {
1452 auto map = this->getMap();
1453 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1454 const int myRank = comm.is_null() ? -1 : comm->getRank();
1455 std::ostringstream os;
1456 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1457 prefix = std::unique_ptr<std::string>(
new std::string(os.str()));
1458 os <<
"Start" << endl;
1459 std::cerr << os.str();
1462 const size_t numCols = sourceMV.getNumVectors();
1467 constantNumPackets = numCols;
1471 if (exportLIDs.extent(0) == 0) {
1472 if (printDebugOutput) {
1473 std::ostringstream os;
1474 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1475 std::cerr << os.str();
1495 const size_t numExportLIDs = exportLIDs.extent(0);
1496 const size_t newExportsSize = numCols * numExportLIDs;
1497 if (printDebugOutput) {
1498 std::ostringstream os;
1499 os << *prefix <<
"realloc: "
1500 <<
"numExportLIDs: " << numExportLIDs
1501 <<
", exports.extent(0): " << exports.extent(0)
1502 <<
", newExportsSize: " << newExportsSize << std::endl;
1503 std::cerr << os.str();
1509 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(sourceMV.need_sync_device() && sourceMV.need_sync_host(),
1511 "Input MultiVector needs sync to both host "
1513 if (printDebugOutput) {
1514 std::ostringstream os;
1515 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1516 std::cerr << os.str();
1528 exports.clear_sync_state();
1529 exports.modify_host();
1536 exports.clear_sync_state();
1537 exports.modify_device();
1553 if (sourceMV.isConstantStride()) {
1554 using KokkosRefactor::Details::pack_array_single_column;
1555 if (printDebugOutput) {
1556 std::ostringstream os;
1557 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1558 std::cerr << os.str();
1561 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1562 pack_array_single_column(exports.view_host(),
1564 exportLIDs.view_host(),
1568 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1569 pack_array_single_column(exports.view_device(),
1571 exportLIDs.view_device(),
1577 using KokkosRefactor::Details::pack_array_single_column;
1578 if (printDebugOutput) {
1579 std::ostringstream os;
1580 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1581 std::cerr << os.str();
1584 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1585 pack_array_single_column(exports.view_host(),
1587 exportLIDs.view_host(),
1588 sourceMV.whichVectors_[0],
1591 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1592 pack_array_single_column(exports.view_device(),
1594 exportLIDs.view_device(),
1595 sourceMV.whichVectors_[0],
1601 if (sourceMV.isConstantStride()) {
1602 using KokkosRefactor::Details::pack_array_multi_column;
1603 if (printDebugOutput) {
1604 std::ostringstream os;
1605 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1606 std::cerr << os.str();
1609 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1610 pack_array_multi_column(exports.view_host(),
1612 exportLIDs.view_host(),
1616 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1617 pack_array_multi_column(exports.view_device(),
1619 exportLIDs.view_device(),
1625 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1626 if (printDebugOutput) {
1627 std::ostringstream os;
1628 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1630 std::cerr << os.str();
1635 using IST = impl_scalar_type;
1636 using DV = Kokkos::DualView<IST*, device_type>;
1637 using HES =
typename DV::t_host::execution_space;
1638 using DES =
typename DV::t_dev::execution_space;
1639 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_();
1641 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1642 pack_array_multi_column_variable_stride(exports.view_host(),
1644 exportLIDs.view_host(),
1645 getKokkosViewDeepCopy<HES>(whichVecs),
1649 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1650 pack_array_multi_column_variable_stride(exports.view_device(),
1652 exportLIDs.view_device(),
1653 getKokkosViewDeepCopy<DES>(whichVecs),
1655 debugCheckIndices, space);
1660 if (printDebugOutput) {
1661 std::ostringstream os;
1662 os << *prefix <<
"Done!" << endl;
1663 std::cerr << os.str();
1667 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1670 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1671 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1672 Kokkos::DualView<size_t*, buffer_device_type> numExportPacketsPerLID,
1673 size_t& constantNumPackets) {
1674 packAndPrepare(sourceObj, exportLIDs, exports, numExportPacketsPerLID, constantNumPackets, execution_space());
1677 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1679 typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1680 typename NO::device_type::memory_space>::value,
1685 const std::string* prefix,
1686 const bool areRemoteLIDsContiguous,
1694 std::ostringstream os;
1695 os << *prefix <<
"Realloc (if needed) imports_ from "
1696 << this->imports_.extent(0) <<
" to " << newSize << std::endl;
1697 std::cerr << os.str();
1700 bool reallocated =
false;
1702 using IST = impl_scalar_type;
1703 using DV = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
1715 if ((std::is_same_v<typename dual_view_type::t_dev::device_type, typename dual_view_type::t_host::device_type> ||
1716 (Details::Behavior::assumeMpiIsGPUAware() && !this->need_sync_device()) ||
1717 (!Details::Behavior::assumeMpiIsGPUAware() && !this->need_sync_host())) &&
1718 areRemoteLIDsContiguous &&
1720 (getNumVectors() == 1) &&
1722 size_t offset = getLocalLength() - newSize;
1723 reallocated = this->imports_.view_device().data() != view_.getDualView().view_device().data() + offset;
1725 typedef std::pair<size_t, size_t> range_type;
1726 this->imports_ = DV(view_.getDualView(),
1727 range_type(offset, getLocalLength()),
1731 std::ostringstream os;
1732 os << *prefix <<
"Aliased imports_ to MV.view_" << std::endl;
1733 std::cerr << os.str();
1743 std::ostringstream os;
1744 os << *prefix <<
"Finished realloc'ing unaliased_imports_" << std::endl;
1745 std::cerr << os.str();
1747 this->imports_ = this->unaliased_imports_;
1752 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1754 typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1755 typename NO::device_type::memory_space>::value,
1760 const std::string* prefix,
1761 const bool areRemoteLIDsContiguous,
1766 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1770 const std::string* prefix,
1771 const bool areRemoteLIDsContiguous,
1774 return reallocImportsIfNeededImpl<Node>(newSize, verbose, prefix, areRemoteLIDsContiguous, CM);
1777 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1780 return (this->imports_.view_device().data() + this->imports_.view_device().extent(0) ==
1781 view_.getDualView().view_device().data() + view_.getDualView().view_device().extent(0));
1784 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1786 unpackAndCombine(
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1787 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1788 Kokkos::DualView<size_t*, buffer_device_type> ,
1789 const size_t constantNumPackets,
1791 const execution_space& space) {
1792 using Kokkos::Compat::getKokkosViewDeepCopy;
1793 using KokkosRefactor::Details::unpack_array_multi_column;
1794 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1796 using ::Tpetra::Details::Behavior;
1797 using ::Tpetra::Details::ProfilingRegion;
1799 const bool unpackOnHost = runKernelOnHost(imports);
1801 const char longFuncNameHost[] =
"Tpetra::MultiVector::unpackAndCombine[Host]";
1802 const char longFuncNameDevice[] =
"Tpetra::MultiVector::unpackAndCombine[Device]";
1803 const char* longFuncName = unpackOnHost ? longFuncNameHost : longFuncNameDevice;
1804 const char tfecfFuncName[] =
"unpackAndCombine: ";
1812 const bool debugCheckIndices = Behavior::debug();
1814 const bool printDebugOutput = Behavior::verbose();
1815 std::unique_ptr<std::string> prefix;
1816 if (printDebugOutput) {
1817 auto map = this->getMap();
1818 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1819 const int myRank = comm.is_null() ? -1 : comm->getRank();
1820 std::ostringstream os;
1821 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1822 prefix = std::unique_ptr<std::string>(
new std::string(os.str()));
1823 os <<
"Start" << endl;
1824 std::cerr << os.str();
1828 if (importLIDs.extent(0) == 0) {
1829 if (printDebugOutput) {
1830 std::ostringstream os;
1831 os << *prefix <<
"No imports. Done!" << endl;
1832 std::cerr << os.str();
1838 if (importsAreAliased()) {
1839 if (printDebugOutput) {
1840 std::ostringstream os;
1841 os << *prefix <<
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" << endl;
1842 std::cerr << os.str();
1847 ProfilingRegion regionUAC(longFuncName);
1849 const size_t numVecs = getNumVectors();
1850 if (debugCheckIndices) {
1851 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(imports.extent(0)) !=
1852 numVecs * importLIDs.extent(0),
1854 "imports.extent(0) = " << imports.extent(0)
1855 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1856 <<
" * " << importLIDs.extent(0) <<
" = "
1857 << numVecs * importLIDs.extent(0) <<
".");
1859 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(constantNumPackets == static_cast<size_t>(0), std::runtime_error,
1860 "constantNumPackets input argument must be nonzero.");
1862 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(numVecs) !=
1863 static_cast<size_t>(constantNumPackets),
1864 std::runtime_error,
"constantNumPackets must equal numVecs.");
1873 if (this->imports_.need_sync_host()) this->imports_.sync_host();
1875 if (this->imports_.need_sync_device()) this->imports_.sync_device();
1878 if (printDebugOutput) {
1879 std::ostringstream os;
1880 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1882 std::cerr << os.str();
1887 auto imports_d = imports.view_device();
1888 auto imports_h = imports.view_host();
1889 auto importLIDs_d = importLIDs.view_device();
1890 auto importLIDs_h = importLIDs.view_host();
1892 Kokkos::DualView<size_t*, device_type> whichVecs;
1893 if (!isConstantStride()) {
1894 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1895 Kokkos::MemoryUnmanaged>
1896 whichVecsIn(whichVectors_.getRawPtr(),
1898 whichVecs = Kokkos::DualView<size_t*, device_type>(
"whichVecs", numVecs);
1900 whichVecs.modify_host();
1904 whichVecs.modify_device();
1909 auto whichVecs_d = whichVecs.view_device();
1910 auto whichVecs_h = whichVecs.view_host();
1920 if (numVecs > 0 && importLIDs.extent(0) > 0) {
1921 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
1922 using host_exec_space =
typename dual_view_type::t_host::execution_space;
1925 const bool use_atomic_updates = unpackOnHost ? host_exec_space().concurrency() != 1 : dev_exec_space().concurrency() != 1;
1927 if (printDebugOutput) {
1928 std::ostringstream os;
1930 std::cerr << os.str();
1937 using op_type = KokkosRefactor::Details::InsertOp;
1938 if (isConstantStride()) {
1940 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1941 unpack_array_multi_column(host_exec_space(),
1942 X_h, imports_h, importLIDs_h,
1948 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1949 unpack_array_multi_column(space,
1950 X_d, imports_d, importLIDs_d,
1957 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1958 unpack_array_multi_column_variable_stride(host_exec_space(),
1967 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1968 unpack_array_multi_column_variable_stride(space,
1979 using op_type = KokkosRefactor::Details::AddOp;
1980 if (isConstantStride()) {
1982 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1983 unpack_array_multi_column(host_exec_space(),
1984 X_h, imports_h, importLIDs_h,
1989 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1990 unpack_array_multi_column(space,
1991 X_d, imports_d, importLIDs_d,
1998 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1999 unpack_array_multi_column_variable_stride(host_exec_space(),
2008 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2009 unpack_array_multi_column_variable_stride(space,
2019 }
else if (CM ==
ABSMAX) {
2020 using op_type = KokkosRefactor::Details::AbsMaxOp;
2021 if (isConstantStride()) {
2023 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2024 unpack_array_multi_column(host_exec_space(),
2025 X_h, imports_h, importLIDs_h,
2030 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2031 unpack_array_multi_column(space,
2032 X_d, imports_d, importLIDs_d,
2039 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2040 unpack_array_multi_column_variable_stride(host_exec_space(),
2049 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2050 unpack_array_multi_column_variable_stride(space,
2061 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error,
"Invalid CombineMode");
2064 if (printDebugOutput) {
2065 std::ostringstream os;
2066 os << *prefix <<
"Nothing to unpack" << endl;
2067 std::cerr << os.str();
2071 if (printDebugOutput) {
2072 std::ostringstream os;
2073 os << *prefix <<
"Done!" << endl;
2074 std::cerr << os.str();
2078 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2080 unpackAndCombine(
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
2081 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
2082 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
2083 const size_t constantNumPackets,
2085 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets, CM, execution_space());
2088 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2092 if (isConstantStride()) {
2093 return static_cast<size_t>(view_.extent(1));
2095 return static_cast<size_t>(whichVectors_.size());
2102 void gblDotImpl(
const RV& dotsOut,
2103 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2104 const bool distributed) {
2105 using Teuchos::REDUCE_MAX;
2106 using Teuchos::REDUCE_SUM;
2107 using Teuchos::reduceAll;
2108 typedef typename RV::non_const_value_type dot_type;
2110 const size_t numVecs = dotsOut.extent(0);
2125 if (distributed && !comm.is_null()) {
2128 const int nv =
static_cast<int>(numVecs);
2129 const bool commIsInterComm = ::Tpetra::Details::isInterComm(*comm);
2131 if (commIsInterComm) {
2135 typename RV::non_const_type lclDots(Kokkos::ViewAllocateWithoutInitializing(
"tmp"), numVecs);
2138 const dot_type*
const lclSum = lclDots.data();
2139 dot_type*
const gblSum = dotsOut.data();
2140 reduceAll<int, dot_type>(*comm, REDUCE_SUM, nv, lclSum, gblSum);
2142 dot_type*
const inout = dotsOut.data();
2143 reduceAll<int, dot_type>(*comm, REDUCE_SUM, nv, inout, inout);
2149 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2152 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const {
2153 using Kokkos::subview;
2154 using Teuchos::Comm;
2155 using Teuchos::null;
2157 using ::Tpetra::Details::Behavior;
2159 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
2160 typedef typename dual_view_type::t_dev_const XMV;
2161 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
2165 const size_t numVecs = this->getNumVectors();
2169 const size_t lclNumRows = this->getLocalLength();
2170 const size_t numDots =
static_cast<size_t>(dots.extent(0));
2171 const bool debug = Behavior::debug();
2174 const bool compat = this->getMap()->isCompatible(*(A.
getMap()));
2175 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!compat, std::invalid_argument,
2176 "'*this' MultiVector is not "
2177 "compatible with the input MultiVector A. We only test for this "
2188 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2190 "MultiVectors do not have the same local length. "
2191 "this->getLocalLength() = "
2192 << lclNumRows <<
" != "
2193 "A.getLocalLength() = "
2195 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2197 "MultiVectors must have the same number of columns (vectors). "
2198 "this->getNumVectors() = "
2199 << numVecs <<
" != "
2200 "A.getNumVectors() = "
2202 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2203 numDots != numVecs, std::runtime_error,
2204 "The output array 'dots' must have the same number of entries as the "
2205 "number of columns (vectors) in *this and A. dots.extent(0) = "
2206 << numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2208 const std::pair<size_t, size_t> colRng(0, numVecs);
2209 RV dotsOut = subview(dots, colRng);
2210 RCP<const Comm<int> > comm = this->getMap().is_null() ? null : this->getMap()->getComm();
2212 auto thisView = this->getLocalViewDevice(Access::ReadOnly);
2215 ::Tpetra::Details::lclDot<RV, XMV>(dotsOut, thisView, A_view, lclNumRows, numVecs,
2216 this->whichVectors_.getRawPtr(),
2227 exec_space_instance.fence(
"Tpetra::MultiVector::dot");
2229 gblDotImpl(dotsOut, comm, this->isDistributed());
2233 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2237 using ::Tpetra::Details::ProfilingRegion;
2239 using dot_type =
typename MV::dot_type;
2240 ProfilingRegion region(
"Tpetra::multiVectorSingleColumnDot");
2243 Teuchos::RCP<const Teuchos::Comm<int> > comm =
2244 map.is_null() ? Teuchos::null : map->getComm();
2245 if (comm.is_null()) {
2246 #if KOKKOS_VERSION >= 40799
2247 return KokkosKernels::ArithTraits<dot_type>::zero();
2249 return Kokkos::ArithTraits<dot_type>::zero();
2252 using LO = LocalOrdinal;
2256 const LO lclNumRows =
static_cast<LO
>(std::min(x.
getLocalLength(),
2258 const Kokkos::pair<LO, LO> rowRng(0, lclNumRows);
2259 #if KOKKOS_VERSION >= 40799
2260 dot_type lclDot = KokkosKernels::ArithTraits<dot_type>::zero();
2262 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero();
2264 #if KOKKOS_VERSION >= 40799
2265 dot_type gblDot = KokkosKernels::ArithTraits<dot_type>::zero();
2267 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero();
2275 auto x_1d = Kokkos::subview(x_2d, rowRng, 0);
2277 auto y_1d = Kokkos::subview(y_2d, rowRng, 0);
2278 lclDot = KokkosBlas::dot(x_1d, y_1d);
2281 using Teuchos::outArg;
2282 using Teuchos::REDUCE_SUM;
2283 using Teuchos::reduceAll;
2284 reduceAll<int, dot_type>(*comm, REDUCE_SUM, lclDot, outArg(gblDot));
2293 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2296 const Teuchos::ArrayView<dot_type>& dots)
const {
2297 const char tfecfFuncName[] =
"dot: ";
2300 const size_t numVecs = this->getNumVectors();
2301 const size_t lclNumRows = this->getLocalLength();
2302 const size_t numDots =
static_cast<size_t>(dots.size());
2312 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lclNumRows != A.
getLocalLength(), std::runtime_error,
2313 "MultiVectors do not have the same local length. "
2314 "this->getLocalLength() = "
2315 << lclNumRows <<
" != "
2316 "A.getLocalLength() = "
2318 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numVecs != A.
getNumVectors(), std::runtime_error,
2319 "MultiVectors must have the same number of columns (vectors). "
2320 "this->getNumVectors() = "
2321 << numVecs <<
" != "
2322 "A.getNumVectors() = "
2324 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numDots != numVecs, std::runtime_error,
2325 "The output array 'dots' must have the same number of entries as the "
2326 "number of columns (vectors) in *this and A. dots.extent(0) = "
2327 << numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2330 const dot_type gblDot = multiVectorSingleColumnDot(*
this, A);
2333 this->dot(A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr(), numDots));
2337 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2339 norm2(
const Teuchos::ArrayView<mag_type>& norms)
const {
2341 using ::Tpetra::Details::NORM_TWO;
2342 using ::Tpetra::Details::ProfilingRegion;
2343 ProfilingRegion region(
"Tpetra::MV::norm2 (host output)");
2346 MV& X =
const_cast<MV&
>(*this);
2347 multiVectorNormImpl(norms.getRawPtr(), X, NORM_TWO);
2350 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2352 norm2(
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const {
2353 Teuchos::ArrayView<mag_type> norms_av(norms.data(), norms.extent(0));
2354 this->norm2(norms_av);
2357 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2359 norm1(
const Teuchos::ArrayView<mag_type>& norms)
const {
2361 using ::Tpetra::Details::NORM_ONE;
2362 using ::Tpetra::Details::ProfilingRegion;
2363 ProfilingRegion region(
"Tpetra::MV::norm1 (host output)");
2366 MV& X =
const_cast<MV&
>(*this);
2367 multiVectorNormImpl(norms.data(), X, NORM_ONE);
2370 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2372 norm1(
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const {
2373 Teuchos::ArrayView<mag_type> norms_av(norms.data(), norms.extent(0));
2374 this->norm1(norms_av);
2377 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2379 normInf(
const Teuchos::ArrayView<mag_type>& norms)
const {
2381 using ::Tpetra::Details::NORM_INF;
2382 using ::Tpetra::Details::ProfilingRegion;
2383 ProfilingRegion region(
"Tpetra::MV::normInf (host output)");
2386 MV& X =
const_cast<MV&
>(*this);
2387 multiVectorNormImpl(norms.getRawPtr(), X, NORM_INF);
2390 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2392 normInf(
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const {
2393 Teuchos::ArrayView<mag_type> norms_av(norms.data(), norms.extent(0));
2394 this->normInf(norms_av);
2397 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2399 meanValue(
const Teuchos::ArrayView<impl_scalar_type>& means)
const {
2402 using Kokkos::subview;
2403 using Teuchos::Comm;
2405 using Teuchos::REDUCE_SUM;
2406 using Teuchos::reduceAll;
2407 #if KOKKOS_VERSION >= 40799
2408 typedef KokkosKernels::ArithTraits<impl_scalar_type> ATS;
2410 typedef Kokkos::ArithTraits<impl_scalar_type> ATS;
2413 const size_t lclNumRows = this->getLocalLength();
2414 const size_t numVecs = this->getNumVectors();
2415 const size_t numMeans =
static_cast<size_t>(means.size());
2417 TEUCHOS_TEST_FOR_EXCEPTION(
2418 numMeans != numVecs, std::runtime_error,
2419 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2420 <<
" != this->getNumVectors() = " << numVecs <<
".");
2422 const std::pair<size_t, size_t> rowRng(0, lclNumRows);
2423 const std::pair<size_t, size_t> colRng(0, numVecs);
2428 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2430 typename local_view_type::host_mirror_type::array_layout,
2432 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
2433 host_local_view_type;
2434 host_local_view_type meansOut(means.getRawPtr(), numMeans);
2436 RCP<const Comm<int> > comm = this->getMap().is_null() ? Teuchos::null : this->getMap()->getComm();
2439 const bool useHostVersion = this->need_sync_device();
2440 if (useHostVersion) {
2442 auto X_lcl = subview(getLocalViewHost(Access::ReadOnly),
2443 rowRng, Kokkos::ALL());
2445 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums(
"MV::meanValue tmp", numVecs);
2446 if (isConstantStride()) {
2447 KokkosBlas::sum(lclSums, X_lcl);
2449 for (
size_t j = 0; j < numVecs; ++j) {
2450 const size_t col = whichVectors_[j];
2451 KokkosBlas::sum(subview(lclSums, j), subview(X_lcl, ALL(), col));
2458 if (!comm.is_null() && this->isDistributed()) {
2459 reduceAll(*comm, REDUCE_SUM, static_cast<int>(numVecs),
2460 lclSums.data(), meansOut.data());
2467 auto X_lcl = subview(this->getLocalViewDevice(Access::ReadOnly),
2468 rowRng, Kokkos::ALL());
2471 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums(
"MV::meanValue tmp", numVecs);
2472 if (isConstantStride()) {
2473 KokkosBlas::sum(lclSums, X_lcl);
2475 for (
size_t j = 0; j < numVecs; ++j) {
2476 const size_t col = whichVectors_[j];
2477 KokkosBlas::sum(subview(lclSums, j), subview(X_lcl, ALL(), col));
2487 exec_space_instance.fence(
"Tpetra::MultiVector::mean");
2493 if (!comm.is_null() && this->isDistributed()) {
2494 reduceAll(*comm, REDUCE_SUM, static_cast<int>(numVecs),
2495 lclSums.data(), meansOut.data());
2505 const impl_scalar_type OneOverN =
2506 ATS::one() /
static_cast<mag_type>(this->getGlobalLength());
2507 for (
size_t k = 0; k < numMeans; ++k) {
2508 meansOut(k) = meansOut(k) * OneOverN;
2512 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2516 #if KOKKOS_VERSION >= 40799
2517 typedef KokkosKernels::ArithTraits<IST> ATS;
2519 typedef Kokkos::ArithTraits<IST> ATS;
2521 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2522 typedef typename pool_type::generator_type generator_type;
2524 const IST max = Kokkos::rand<generator_type, IST>::max();
2525 const IST min = ATS::is_signed ? IST(-max) : ATS::zero();
2527 this->randomize(static_cast<Scalar>(min), static_cast<Scalar>(max));
2530 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2534 typedef Tpetra::Details::Static_Random_XorShift64_Pool<typename device_type::execution_space> tpetra_pool_type;
2535 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2538 if (!tpetra_pool_type::isSet())
2539 tpetra_pool_type::resetPool(this->getMap()->getComm()->getRank());
2541 pool_type& rand_pool = tpetra_pool_type::getPool();
2542 const IST max =
static_cast<IST
>(maxVal);
2543 const IST min =
static_cast<IST
>(minVal);
2545 auto thisView = this->getLocalViewDevice(Access::OverwriteAll);
2547 if (isConstantStride()) {
2548 Kokkos::fill_random(thisView, rand_pool, min, max);
2550 const size_t numVecs = getNumVectors();
2551 for (
size_t k = 0; k < numVecs; ++k) {
2552 const size_t col = whichVectors_[k];
2553 auto X_k = Kokkos::subview(thisView, Kokkos::ALL(), col);
2554 Kokkos::fill_random(X_k, rand_pool, min, max);
2559 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2562 using ::Tpetra::Details::ProfilingRegion;
2563 using ::Tpetra::Details::Blas::fill;
2564 using DES =
typename dual_view_type::t_dev::execution_space;
2565 using HES =
typename dual_view_type::t_host::execution_space;
2566 using LO = LocalOrdinal;
2567 ProfilingRegion region(
"Tpetra::MultiVector::putScalar");
2572 const LO lclNumRows =
static_cast<LO
>(this->getLocalLength());
2573 const LO numVecs =
static_cast<LO
>(this->getNumVectors());
2579 const bool runOnHost = runKernelOnHost(*
this);
2581 auto X = this->getLocalViewDevice(Access::OverwriteAll);
2582 if (this->isConstantStride()) {
2583 fill(DES(), X, theAlpha, lclNumRows, numVecs);
2585 fill(DES(), X, theAlpha, lclNumRows, numVecs,
2586 this->whichVectors_.getRawPtr());
2589 auto X = this->getLocalViewHost(Access::OverwriteAll);
2590 if (this->isConstantStride()) {
2591 fill(HES(), X, theAlpha, lclNumRows, numVecs);
2593 fill(HES(), X, theAlpha, lclNumRows, numVecs,
2594 this->whichVectors_.getRawPtr());
2599 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2602 using Teuchos::ArrayRCP;
2603 using Teuchos::Comm;
2606 using LO = LocalOrdinal;
2607 using GO = GlobalOrdinal;
2613 TEUCHOS_TEST_FOR_EXCEPTION(
2614 !this->isConstantStride(), std::logic_error,
2615 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2616 "if the MultiVector is a column view of another MultiVector (that is, if "
2617 "isConstantStride() == false).");
2647 if (this->getMap().is_null()) {
2652 TEUCHOS_TEST_FOR_EXCEPTION(
2653 newMap.is_null(), std::invalid_argument,
2654 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2655 "This probably means that the input Map is incorrect.");
2659 const size_t newNumRows = newMap->getLocalNumElements();
2660 const size_t origNumRows = view_.extent(0);
2661 const size_t numCols = this->getNumVectors();
2663 if (origNumRows != newNumRows || view_.extent(1) != numCols) {
2664 view_ = allocDualView<ST, LO, GO, Node>(newNumRows, numCols);
2666 }
else if (newMap.is_null()) {
2669 const size_t newNumRows =
static_cast<size_t>(0);
2670 const size_t numCols = this->getNumVectors();
2671 view_ = allocDualView<ST, LO, GO, Node>(newNumRows, numCols);
2674 this->map_ = newMap;
2677 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2683 const IST theAlpha =
static_cast<IST
>(alpha);
2684 #if KOKKOS_VERSION >= 40799
2685 if (theAlpha == KokkosKernels::ArithTraits<IST>::one()) {
2687 if (theAlpha == Kokkos::ArithTraits<IST>::one()) {
2691 const size_t lclNumRows = getLocalLength();
2692 const size_t numVecs = getNumVectors();
2693 const std::pair<size_t, size_t> rowRng(0, lclNumRows);
2694 const std::pair<size_t, size_t> colRng(0, numVecs);
2702 const bool useHostVersion = need_sync_device();
2703 if (useHostVersion) {
2704 auto Y_lcl = Kokkos::subview(getLocalViewHost(Access::ReadWrite), rowRng, ALL());
2705 if (isConstantStride()) {
2706 KokkosBlas::scal(Y_lcl, theAlpha, Y_lcl);
2708 for (
size_t k = 0; k < numVecs; ++k) {
2709 const size_t Y_col = whichVectors_[k];
2710 auto Y_k = Kokkos::subview(Y_lcl, ALL(), Y_col);
2711 KokkosBlas::scal(Y_k, theAlpha, Y_k);
2715 auto Y_lcl = Kokkos::subview(getLocalViewDevice(Access::ReadWrite), rowRng, ALL());
2716 if (isConstantStride()) {
2717 KokkosBlas::scal(Y_lcl, theAlpha, Y_lcl);
2719 for (
size_t k = 0; k < numVecs; ++k) {
2720 const size_t Y_col = isConstantStride() ? k : whichVectors_[k];
2721 auto Y_k = Kokkos::subview(Y_lcl, ALL(), Y_col);
2722 KokkosBlas::scal(Y_k, theAlpha, Y_k);
2728 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2730 scale(
const Teuchos::ArrayView<const Scalar>& alphas) {
2731 const size_t numVecs = this->getNumVectors();
2732 const size_t numAlphas =
static_cast<size_t>(alphas.size());
2733 TEUCHOS_TEST_FOR_EXCEPTION(
2734 numAlphas != numVecs, std::invalid_argument,
2735 "Tpetra::MultiVector::"
2736 "scale: alphas.size() = "
2737 << numAlphas <<
" != this->getNumVectors() = "
2741 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2742 k_alphas_type k_alphas(
"alphas::tmp", numAlphas);
2743 k_alphas.modify_host();
2744 for (
size_t i = 0; i < numAlphas; ++i) {
2745 k_alphas.view_host()(i) = static_cast<impl_scalar_type>(alphas[i]);
2747 k_alphas.sync_device();
2749 this->scale(k_alphas.view_device());
2752 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2754 scale(
const Kokkos::View<const impl_scalar_type*, device_type>& alphas) {
2756 using Kokkos::subview;
2758 const size_t lclNumRows = this->getLocalLength();
2759 const size_t numVecs = this->getNumVectors();
2760 TEUCHOS_TEST_FOR_EXCEPTION(
2761 static_cast<size_t>(alphas.extent(0)) != numVecs,
2762 std::invalid_argument,
2763 "Tpetra::MultiVector::scale(alphas): "
2764 "alphas.extent(0) = "
2766 <<
" != this->getNumVectors () = " << numVecs <<
".");
2767 const std::pair<size_t, size_t> rowRng(0, lclNumRows);
2768 const std::pair<size_t, size_t> colRng(0, numVecs);
2778 const bool useHostVersion = this->need_sync_device();
2779 if (useHostVersion) {
2782 auto alphas_h = Kokkos::create_mirror_view(alphas);
2786 auto Y_lcl = subview(this->getLocalViewHost(Access::ReadWrite), rowRng, ALL());
2787 if (isConstantStride()) {
2788 KokkosBlas::scal(Y_lcl, alphas_h, Y_lcl);
2790 for (
size_t k = 0; k < numVecs; ++k) {
2791 const size_t Y_col = this->isConstantStride() ? k : this->whichVectors_[k];
2792 auto Y_k = subview(Y_lcl, ALL(), Y_col);
2795 KokkosBlas::scal(Y_k, alphas_h(k), Y_k);
2799 auto Y_lcl = subview(this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL());
2800 if (isConstantStride()) {
2801 KokkosBlas::scal(Y_lcl, alphas, Y_lcl);
2807 auto alphas_h = Kokkos::create_mirror_view(alphas);
2811 for (
size_t k = 0; k < numVecs; ++k) {
2812 const size_t Y_col = this->isConstantStride() ? k : this->whichVectors_[k];
2813 auto Y_k = subview(Y_lcl, ALL(), Y_col);
2814 KokkosBlas::scal(Y_k, alphas_h(k), Y_k);
2820 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2825 using Kokkos::subview;
2826 const char tfecfFuncName[] =
"scale: ";
2828 const size_t lclNumRows = getLocalLength();
2829 const size_t numVecs = getNumVectors();
2831 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2833 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2835 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2837 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2841 const std::pair<size_t, size_t> rowRng(0, lclNumRows);
2842 const std::pair<size_t, size_t> colRng(0, numVecs);
2844 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
2846 auto Y_lcl = subview(Y_lcl_orig, rowRng, ALL());
2847 auto X_lcl = subview(X_lcl_orig, rowRng, ALL());
2850 KokkosBlas::scal(Y_lcl, theAlpha, X_lcl);
2853 for (
size_t k = 0; k < numVecs; ++k) {
2854 const size_t Y_col = this->isConstantStride() ? k : this->whichVectors_[k];
2856 auto Y_k = subview(Y_lcl, ALL(), Y_col);
2857 auto X_k = subview(X_lcl, ALL(), X_col);
2859 KokkosBlas::scal(Y_k, theAlpha, X_k);
2864 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2867 const char tfecfFuncName[] =
"reciprocal: ";
2869 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2871 "MultiVectors do not have the same local length. "
2872 "this->getLocalLength() = "
2875 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2876 A.
getNumVectors() != this->getNumVectors(), std::runtime_error,
2877 ": MultiVectors do not have the same number of columns (vectors). "
2878 "this->getNumVectors() = "
2882 const size_t numVecs = getNumVectors();
2884 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
2888 KokkosBlas::reciprocal(this_view_dev, A_view_dev);
2891 using Kokkos::subview;
2892 for (
size_t k = 0; k < numVecs; ++k) {
2893 const size_t this_col = isConstantStride() ? k : whichVectors_[k];
2894 auto vector_k = subview(this_view_dev, ALL(), this_col);
2895 const size_t A_col = isConstantStride() ? k : A.
whichVectors_[k];
2896 auto vector_Ak = subview(A_view_dev, ALL(), A_col);
2897 KokkosBlas::reciprocal(vector_k, vector_Ak);
2902 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2905 const char tfecfFuncName[] =
"abs";
2907 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2909 ": MultiVectors do not have the same local length. "
2910 "this->getLocalLength() = "
2913 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2914 A.
getNumVectors() != this->getNumVectors(), std::runtime_error,
2915 ": MultiVectors do not have the same number of columns (vectors). "
2916 "this->getNumVectors() = "
2919 const size_t numVecs = getNumVectors();
2921 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
2925 KokkosBlas::abs(this_view_dev, A_view_dev);
2928 using Kokkos::subview;
2930 for (
size_t k = 0; k < numVecs; ++k) {
2931 const size_t this_col = isConstantStride() ? k : whichVectors_[k];
2932 auto vector_k = subview(this_view_dev, ALL(), this_col);
2933 const size_t A_col = isConstantStride() ? k : A.
whichVectors_[k];
2934 auto vector_Ak = subview(A_view_dev, ALL(), A_col);
2935 KokkosBlas::abs(vector_k, vector_Ak);
2940 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2944 const Scalar& beta) {
2945 const char tfecfFuncName[] =
"update: ";
2947 using Kokkos::subview;
2951 const size_t lclNumRows = getLocalLength();
2952 const size_t numVecs = getNumVectors();
2955 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2957 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2959 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2961 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2967 const std::pair<size_t, size_t> rowRng(0, lclNumRows);
2968 const std::pair<size_t, size_t> colRng(0, numVecs);
2970 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
2971 auto Y_lcl = subview(Y_lcl_orig, rowRng, Kokkos::ALL());
2973 auto X_lcl = subview(X_lcl_orig, rowRng, Kokkos::ALL());
2977 KokkosBlas::axpby(theAlpha, X_lcl, theBeta, Y_lcl);
2980 for (
size_t k = 0; k < numVecs; ++k) {
2981 const size_t Y_col = this->isConstantStride() ? k : this->whichVectors_[k];
2983 auto Y_k = subview(Y_lcl, ALL(), Y_col);
2984 auto X_k = subview(X_lcl, ALL(), X_col);
2986 KokkosBlas::axpby(theAlpha, X_k, theBeta, Y_k);
2991 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2997 const Scalar& gamma) {
2999 using Kokkos::subview;
3001 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3005 const size_t lclNumRows = this->getLocalLength();
3006 const size_t numVecs = getNumVectors();
3009 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3011 "The input MultiVector A has " << A.
getLocalLength() <<
" local "
3012 "row(s), but this MultiVector has "
3013 << lclNumRows <<
" local "
3015 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3017 "The input MultiVector B has " << B.
getLocalLength() <<
" local "
3018 "row(s), but this MultiVector has "
3019 << lclNumRows <<
" local "
3021 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3023 "The input MultiVector A has " << A.
getNumVectors() <<
" column(s), "
3024 "but this MultiVector has "
3025 << numVecs <<
" column(s).");
3026 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3028 "The input MultiVector B has " << B.
getNumVectors() <<
" column(s), "
3029 "but this MultiVector has "
3030 << numVecs <<
" column(s).");
3037 const std::pair<size_t, size_t> rowRng(0, lclNumRows);
3038 const std::pair<size_t, size_t> colRng(0, numVecs);
3043 auto C_lcl = subview(this->getLocalViewDevice(Access::ReadWrite), rowRng, ALL());
3048 KokkosBlas::update(theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3052 for (
size_t k = 0; k < numVecs; ++k) {
3053 const size_t this_col = isConstantStride() ? k : whichVectors_[k];
3056 KokkosBlas::update(theAlpha, subview(A_lcl, rowRng, A_col),
3057 theBeta, subview(B_lcl, rowRng, B_col),
3058 theGamma, subview(C_lcl, rowRng, this_col));
3063 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3064 Teuchos::ArrayRCP<const Scalar>
3069 const char tfecfFuncName[] =
"getData: ";
3076 auto hostView = getLocalViewHost(Access::ReadOnly);
3077 const size_t col = isConstantStride() ? j : whichVectors_[j];
3078 auto hostView_j = Kokkos::subview(hostView, ALL(), col);
3079 Teuchos::ArrayRCP<const IST> dataAsArcp =
3080 Kokkos::Compat::persistingView(hostView_j, 0, getLocalLength());
3082 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(hostView_j.extent(0)) <
3083 static_cast<size_t>(dataAsArcp.size()),
3085 "hostView_j.extent(0) = " << hostView_j.extent(0)
3086 <<
" < dataAsArcp.size() = " << dataAsArcp.size() <<
". "
3087 "Please report this bug to the Tpetra developers.");
3089 return Teuchos::arcp_reinterpret_cast<
const Scalar>(dataAsArcp);
3092 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3093 Teuchos::ArrayRCP<Scalar>
3097 using Kokkos::subview;
3099 const char tfecfFuncName[] =
"getDataNonConst: ";
3101 auto hostView = getLocalViewHost(Access::ReadWrite);
3102 const size_t col = isConstantStride() ? j : whichVectors_[j];
3103 auto hostView_j = subview(hostView, ALL(), col);
3104 Teuchos::ArrayRCP<IST> dataAsArcp =
3105 Kokkos::Compat::persistingView(hostView_j, 0, getLocalLength());
3107 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(hostView_j.extent(0)) <
3108 static_cast<size_t>(dataAsArcp.size()),
3110 "hostView_j.extent(0) = " << hostView_j.extent(0)
3111 <<
" < dataAsArcp.size() = " << dataAsArcp.size() <<
". "
3112 "Please report this bug to the Tpetra developers.");
3114 return Teuchos::arcp_reinterpret_cast<Scalar>(dataAsArcp);
3117 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3118 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3120 subCopy(
const Teuchos::ArrayView<const size_t>& cols)
const {
3126 bool contiguous =
true;
3127 const size_t numCopyVecs =
static_cast<size_t>(cols.size());
3128 for (
size_t j = 1; j < numCopyVecs; ++j) {
3129 if (cols[j] != cols[j - 1] + static_cast<size_t>(1)) {
3134 if (contiguous && numCopyVecs > 0) {
3135 return this->subCopy(Teuchos::Range1D(cols[0], cols[numCopyVecs - 1]));
3137 RCP<const MV> X_sub = this->subView(cols);
3138 RCP<MV> Y(
new MV(this->getMap(), numCopyVecs,
false));
3144 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3145 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3151 RCP<const MV> X_sub = this->subView(colRng);
3152 RCP<MV> Y(
new MV(this->getMap(), static_cast<size_t>(colRng.size()),
false));
3157 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3161 return view_.origExtent(0);
3164 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3168 return view_.origExtent(1);
3171 template <
class Scalar,
class LO,
class GO,
class Node>
3174 const Teuchos::RCP<const map_type>& subMap,
3175 const local_ordinal_type rowOffset)
3176 : base_type(subMap) {
3178 using Kokkos::subview;
3180 using Teuchos::outArg;
3183 using Teuchos::REDUCE_MIN;
3184 using Teuchos::reduceAll;
3186 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3187 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3190 std::unique_ptr<std::ostringstream> errStrm;
3197 const auto comm = subMap->getComm();
3198 TEUCHOS_ASSERT(!comm.is_null());
3199 const int myRank = comm->getRank();
3203 const LO newNumRows =
static_cast<LO
>(subMap->getLocalNumElements());
3205 std::ostringstream os;
3206 os <<
"Proc " << myRank <<
": " << prefix
3207 <<
"X: {lclNumRows: " << lclNumRowsBefore
3209 <<
", numCols: " << numCols <<
"}, "
3210 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3211 std::cerr << os.str();
3216 const bool tooManyElts =
3219 errStrm = std::unique_ptr<std::ostringstream>(
new std::ostringstream);
3220 *errStrm <<
" Proc " << myRank <<
": subMap->getLocalNumElements() (="
3221 << newNumRows <<
") + rowOffset (=" << rowOffset
3225 TEUCHOS_TEST_FOR_EXCEPTION(!debug && tooManyElts, std::invalid_argument,
3226 prefix << errStrm->str() << suffix);
3230 reduceAll<int, int>(*comm, REDUCE_MIN, lclGood, outArg(gblGood));
3232 std::ostringstream gblErrStrm;
3233 const std::string myErrStr =
3234 errStrm.get() !=
nullptr ? errStrm->str() : std::string(
"");
3235 ::Tpetra::Details::gathervPrint(gblErrStrm, myErrStr, *comm);
3236 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, gblErrStrm.str());
3240 using range_type = std::pair<LO, LO>;
3241 const range_type origRowRng(rowOffset, static_cast<LO>(X.
view_.origExtent(0)));
3242 const range_type rowRng(rowOffset, rowOffset + newNumRows);
3244 wrapped_dual_view_type newView = takeSubview(X.
view_, rowRng, ALL());
3251 if (newView.extent(0) == 0 &&
3252 newView.extent(1) != X.
view_.extent(1)) {
3260 const LO lclNumRowsRet =
static_cast<LO
>(subViewMV.getLocalLength());
3261 const LO numColsRet =
static_cast<LO
>(subViewMV.getNumVectors());
3262 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3264 if (errStrm.get() ==
nullptr) {
3265 errStrm = std::unique_ptr<std::ostringstream>(
new std::ostringstream);
3267 *errStrm <<
" Proc " << myRank <<
": subMap.getLocalNumElements(): " << newNumRows <<
", subViewMV.getLocalLength(): " << lclNumRowsRet <<
", X.getNumVectors(): " << numCols <<
", subViewMV.getNumVectors(): " << numColsRet << endl;
3269 reduceAll<int, int>(*comm, REDUCE_MIN, lclGood, outArg(gblGood));
3271 std::ostringstream gblErrStrm;
3273 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3274 "dimensions on one or more processes:"
3277 const std::string myErrStr =
3278 errStrm.get() !=
nullptr ? errStrm->str() : std::string(
"");
3279 ::Tpetra::Details::gathervPrint(gblErrStrm, myErrStr, *comm);
3280 gblErrStrm << suffix << endl;
3281 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, gblErrStrm.str());
3286 std::ostringstream os;
3287 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3288 std::cerr << os.str();
3294 std::ostringstream os;
3295 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3296 std::cerr << os.str();
3300 template <
class Scalar,
class LO,
class GO,
class Node>
3303 const map_type& subMap,
3304 const size_t rowOffset)
3305 :
MultiVector(X, Teuchos::RCP<const map_type>(new map_type(subMap)),
3308 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3309 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3312 const size_t offset)
const {
3314 return Teuchos::rcp(
new MV(*
this, *subMap, offset));
3317 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3318 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3321 const size_t offset) {
3323 return Teuchos::rcp(
new MV(*
this, *subMap, offset));
3326 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3327 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3329 subView(
const Teuchos::ArrayView<const size_t>& cols)
const {
3330 using Teuchos::Array;
3334 const size_t numViewCols =
static_cast<size_t>(cols.size());
3335 TEUCHOS_TEST_FOR_EXCEPTION(
3336 numViewCols < 1, std::runtime_error,
3337 "Tpetra::MultiVector::subView"
3338 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3339 "contain at least one entry, but cols.size() = "
3345 bool contiguous =
true;
3346 for (
size_t j = 1; j < numViewCols; ++j) {
3347 if (cols[j] != cols[j - 1] + static_cast<size_t>(1)) {
3353 if (numViewCols == 0) {
3355 return rcp(
new MV(this->getMap(), numViewCols));
3358 return this->subView(Teuchos::Range1D(cols[0], cols[numViewCols - 1]));
3362 if (isConstantStride()) {
3363 return rcp(
new MV(this->getMap(), view_, cols));
3365 Array<size_t> newcols(cols.size());
3366 for (
size_t j = 0; j < numViewCols; ++j) {
3367 newcols[j] = whichVectors_[cols[j]];
3369 return rcp(
new MV(this->getMap(), view_, newcols()));
3373 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3374 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3378 using Kokkos::subview;
3379 using Teuchos::Array;
3382 using ::Tpetra::Details::Behavior;
3384 const char tfecfFuncName[] =
"subView(Range1D): ";
3386 const size_t lclNumRows = this->getLocalLength();
3387 const size_t numVecs = this->getNumVectors();
3391 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3392 static_cast<size_t>(colRng.size()) > numVecs, std::runtime_error,
3393 "colRng.size() = " << colRng.size() <<
" > this->getNumVectors() = "
3395 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3396 numVecs != 0 && colRng.size() != 0 &&
3397 (colRng.lbound() <
static_cast<Teuchos::Ordinal
>(0) ||
3398 static_cast<size_t>(colRng.ubound()) >= numVecs),
3399 std::invalid_argument,
"Nonempty input range [" << colRng.lbound() <<
"," << colRng.ubound() <<
"] exceeds the valid range of column indices "
3401 << numVecs <<
"].");
3403 RCP<const MV> X_ret;
3413 const std::pair<size_t, size_t> rows(0, lclNumRows);
3414 if (colRng.size() == 0) {
3415 const std::pair<size_t, size_t> cols(0, 0);
3416 wrapped_dual_view_type X_sub = takeSubview(this->view_, ALL(), cols);
3417 X_ret = rcp(
new MV(this->getMap(), X_sub));
3420 if (isConstantStride()) {
3421 const std::pair<size_t, size_t> cols(colRng.lbound(),
3422 colRng.ubound() + 1);
3423 wrapped_dual_view_type X_sub = takeSubview(this->view_, ALL(), cols);
3424 X_ret = rcp(
new MV(this->getMap(), X_sub));
3426 if (static_cast<size_t>(colRng.size()) == static_cast<size_t>(1)) {
3429 const std::pair<size_t, size_t> col(whichVectors_[0] + colRng.lbound(),
3430 whichVectors_[0] + colRng.ubound() + 1);
3431 wrapped_dual_view_type X_sub = takeSubview(view_, ALL(), col);
3432 X_ret = rcp(
new MV(this->getMap(), X_sub));
3434 Array<size_t> which(whichVectors_.begin() + colRng.lbound(),
3435 whichVectors_.begin() + colRng.ubound() + 1);
3436 X_ret = rcp(
new MV(this->getMap(), view_, which));
3441 const bool debug = Behavior::debug();
3443 using Teuchos::Comm;
3444 using Teuchos::outArg;
3445 using Teuchos::REDUCE_MIN;
3446 using Teuchos::reduceAll;
3448 RCP<const Comm<int> > comm = this->getMap().is_null() ? Teuchos::null : this->getMap()->getComm();
3449 if (!comm.is_null()) {
3453 if (X_ret.is_null()) {
3456 reduceAll<int, int>(*comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess));
3457 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lclSuccess != 1, std::logic_error,
3458 "X_ret (the subview of this "
3459 "MultiVector; the return value of this method) is null on some MPI "
3460 "process in this MultiVector's communicator. This should never "
3461 "happen. Please report this bug to the Tpetra developers.");
3462 if (!X_ret.is_null() &&
3463 X_ret->getNumVectors() !=
static_cast<size_t>(colRng.size())) {
3466 reduceAll<int, int>(*comm, REDUCE_MIN, lclSuccess,
3467 outArg(gblSuccess));
3468 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lclSuccess != 1, std::logic_error,
3469 "X_ret->getNumVectors() != "
3470 "colRng.size(), on at least one MPI process in this MultiVector's "
3471 "communicator. This should never happen. "
3472 "Please report this bug to the Tpetra developers.");
3478 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3479 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3483 return Teuchos::rcp_const_cast<MV>(this->subView(cols));
3486 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3487 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3491 return Teuchos::rcp_const_cast<MV>(this->subView(colRng));
3494 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3499 using Kokkos::subview;
3500 typedef std::pair<size_t, size_t> range_type;
3501 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3504 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(j >= numCols, std::invalid_argument,
"Input index j (== " << j <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3506 this->
view_ = takeSubview(X.
view_, Kokkos::ALL(), range_type(jj, jj + 1));
3517 const size_t newSize = X.
imports_.extent(0) / numCols;
3518 const size_t offset = jj * newSize;
3519 auto device_view = subview(X.
imports_.view_device(),
3520 range_type(offset, offset + newSize));
3521 auto host_view = subview(X.
imports_.view_host(),
3522 range_type(offset, offset + newSize));
3526 const size_t newSize = X.
exports_.extent(0) / numCols;
3527 const size_t offset = jj * newSize;
3528 auto device_view = subview(X.
exports_.view_device(),
3529 range_type(offset, offset + newSize));
3530 auto host_view = subview(X.
exports_.view_host(),
3531 range_type(offset, offset + newSize));
3541 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3542 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3546 return Teuchos::rcp(
new V(*
this, j));
3549 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3550 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3554 return Teuchos::rcp(
new V(*
this, j));
3557 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3559 get1dCopy(
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const {
3561 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3563 Kokkos::MemoryUnmanaged>;
3564 const char tfecfFuncName[] =
"get1dCopy: ";
3566 const size_t numRows = this->getLocalLength();
3567 const size_t numCols = this->getNumVectors();
3569 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < numRows, std::runtime_error,
3570 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3571 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numRows >
size_t(0) && numCols >
size_t(0) &&
3572 size_t(A.size()) < LDA * (numCols - 1) + numRows,
3574 "A.size() = " << A.size() <<
", but its size must be at least "
3575 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3577 const std::pair<size_t, size_t> rowRange(0, numRows);
3578 const std::pair<size_t, size_t> colRange(0, numCols);
3580 input_view_type A_view_orig(reinterpret_cast<IST*>(A.getRawPtr()),
3582 auto A_view = Kokkos::subview(A_view_orig, rowRange, colRange);
3595 if (this->need_sync_host() && this->need_sync_device()) {
3598 throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
3600 const bool useHostView = view_.host_view_use_count() >= view_.device_view_use_count();
3601 if (this->isConstantStride()) {
3603 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3607 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3612 for (
size_t j = 0; j < numCols; ++j) {
3613 const size_t srcCol = this->whichVectors_[j];
3614 auto dstColView = Kokkos::subview(A_view, rowRange, j);
3617 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3618 auto srcColView_host = Kokkos::subview(srcView_host, rowRange, srcCol);
3622 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3623 auto srcColView_device = Kokkos::subview(srcView_device, rowRange, srcCol);
3632 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3634 get2dCopy(
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const {
3636 const char tfecfFuncName[] =
"get2dCopy: ";
3637 const size_t numRows = this->getLocalLength();
3638 const size_t numCols = this->getNumVectors();
3640 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3641 static_cast<size_t>(ArrayOfPtrs.size()) != numCols,
3643 "Input array of pointers must contain as many "
3644 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3645 << ArrayOfPtrs.size() <<
" != getNumVectors() = " << numCols <<
".");
3647 if (numRows != 0 && numCols != 0) {
3649 for (
size_t j = 0; j < numCols; ++j) {
3650 const size_t dstLen =
static_cast<size_t>(ArrayOfPtrs[j].size());
3651 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3652 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3653 "the input array of arrays is not long enough to fit all entries in "
3654 "that column of the MultiVector. ArrayOfPtrs[j].size() = "
3655 << dstLen <<
" < getLocalLength() = " << numRows <<
".");
3659 for (
size_t j = 0; j < numCols; ++j) {
3660 Teuchos::RCP<const V> X_j = this->getVector(j);
3661 const size_t LDA =
static_cast<size_t>(ArrayOfPtrs[j].size());
3662 X_j->get1dCopy(ArrayOfPtrs[j], LDA);
3667 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3668 Teuchos::ArrayRCP<const Scalar>
3671 if (getLocalLength() == 0 || getNumVectors() == 0) {
3672 return Teuchos::null;
3674 TEUCHOS_TEST_FOR_EXCEPTION(
3675 !isConstantStride(), std::runtime_error,
3676 "Tpetra::MultiVector::"
3677 "get1dView: This MultiVector does not have constant stride, so it is "
3678 "not possible to view its data as a single array. You may check "
3679 "whether a MultiVector has constant stride by calling "
3680 "isConstantStride().");
3684 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3685 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3686 Kokkos::Compat::persistingView(X_lcl);
3687 return Teuchos::arcp_reinterpret_cast<
const Scalar>(dataAsArcp);
3691 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3692 Teuchos::ArrayRCP<Scalar>
3695 if (this->getLocalLength() == 0 || this->getNumVectors() == 0) {
3696 return Teuchos::null;
3698 TEUCHOS_TEST_FOR_EXCEPTION(!isConstantStride(), std::runtime_error,
3699 "Tpetra::MultiVector::"
3700 "get1dViewNonConst: This MultiVector does not have constant stride, "
3701 "so it is not possible to view its data as a single array. You may "
3702 "check whether a MultiVector has constant stride by calling "
3703 "isConstantStride().");
3704 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3705 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3706 Kokkos::Compat::persistingView(X_lcl);
3707 return Teuchos::arcp_reinterpret_cast<Scalar>(dataAsArcp);
3711 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3712 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3715 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3721 const size_t myNumRows = this->getLocalLength();
3722 const size_t numCols = this->getNumVectors();
3723 const Kokkos::pair<size_t, size_t> rowRange(0, myNumRows);
3725 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views(numCols);
3726 for (
size_t j = 0; j < numCols; ++j) {
3727 const size_t col = this->isConstantStride() ? j : this->whichVectors_[j];
3728 auto X_lcl_j = Kokkos::subview(X_lcl, rowRange, col);
3729 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3730 Kokkos::Compat::persistingView(X_lcl_j);
3731 views[j] = Teuchos::arcp_reinterpret_cast<Scalar>(X_lcl_j_arcp);
3736 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3740 return view_.getHostView(s);
3743 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3747 return view_.getHostView(s);
3750 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3754 return view_.getHostView(s);
3757 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3761 return view_.getDeviceView(s);
3764 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3768 return view_.getDeviceView(s);
3771 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3775 return view_.getDeviceView(s);
3778 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3785 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3786 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3792 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3798 const size_t myNumRows = this->getLocalLength();
3799 const size_t numCols = this->getNumVectors();
3800 const Kokkos::pair<size_t, size_t> rowRange(0, myNumRows);
3802 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views(numCols);
3803 for (
size_t j = 0; j < numCols; ++j) {
3804 const size_t col = this->isConstantStride() ? j : this->whichVectors_[j];
3805 auto X_lcl_j = Kokkos::subview(X_lcl, rowRange, col);
3806 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
3807 Kokkos::Compat::persistingView(X_lcl_j);
3808 views[j] = Teuchos::arcp_reinterpret_cast<
const Scalar>(X_lcl_j_arcp);
3813 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3816 Teuchos::ETransp transB,
3817 const Scalar& alpha,
3820 const Scalar& beta) {
3822 using Teuchos::CONJ_TRANS;
3823 using Teuchos::NO_TRANS;
3826 using Teuchos::rcpFromRef;
3827 using Teuchos::TRANS;
3828 using ::Tpetra::Details::ProfilingRegion;
3829 #if KOKKOS_VERSION >= 40799
3830 using ATS = KokkosKernels::ArithTraits<impl_scalar_type>;
3832 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
3835 using STS = Teuchos::ScalarTraits<Scalar>;
3837 const char tfecfFuncName[] =
"multiply: ";
3838 ProfilingRegion region(
"Tpetra::MV::multiply");
3870 const bool C_is_local = !this->isDistributed();
3880 auto myMap = this->getMap();
3881 if (!myMap.is_null() && !myMap->getComm().is_null()) {
3882 using Teuchos::outArg;
3883 using Teuchos::REDUCE_MIN;
3884 using Teuchos::reduceAll;
3886 auto comm = myMap->getComm();
3887 const size_t A_nrows =
3889 const size_t A_ncols =
3891 const size_t B_nrows =
3893 const size_t B_ncols =
3896 const bool lclBad = this->getLocalLength() != A_nrows ||
3897 this->getNumVectors() != B_ncols || A_ncols != B_nrows;
3899 const int myRank = comm->getRank();
3900 std::ostringstream errStrm;
3901 if (this->getLocalLength() != A_nrows) {
3902 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
3903 << this->getLocalLength() <<
" != A_nrows=" << A_nrows
3904 <<
"." << std::endl;
3906 if (this->getNumVectors() != B_ncols) {
3907 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
3908 << this->getNumVectors() <<
" != B_ncols=" << B_ncols
3909 <<
"." << std::endl;
3911 if (A_ncols != B_nrows) {
3912 errStrm <<
"Proc " << myRank <<
": A_ncols="
3913 << A_ncols <<
" != B_nrows=" << B_nrows
3914 <<
"." << std::endl;
3917 const int lclGood = lclBad ? 0 : 1;
3919 reduceAll<int, int>(*comm, REDUCE_MIN, lclGood, outArg(gblGood));
3921 std::ostringstream os;
3922 ::Tpetra::Details::gathervPrint(os, errStrm.str(), *comm);
3924 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3925 "Inconsistent local dimensions on at "
3926 "least one process in this object's communicator."
3929 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
3931 << (transA == Teuchos::TRANS ?
"^T" : (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3932 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
3934 << (transA == Teuchos::TRANS ?
"^T" : (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3935 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
3936 <<
"Global dimensions: C(" << this->getGlobalLength() <<
", "
3946 const bool Case1 = C_is_local && A_is_local && B_is_local;
3948 const bool Case2 = C_is_local && !A_is_local && !B_is_local &&
3949 transA != NO_TRANS &&
3952 const bool Case3 = !C_is_local && !A_is_local && B_is_local &&
3956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!Case1 && !Case2 && !Case3, std::runtime_error,
3957 "Multiplication of op(A) and op(B) into *this is not a "
3958 "supported use case.");
3960 if (beta != STS::zero() && Case2) {
3966 const int myRank = this->getMap()->getComm()->getRank();
3968 beta_local = ATS::zero();
3977 if (!isConstantStride()) {
3978 C_tmp = rcp(
new MV(*
this, Teuchos::Copy));
3980 C_tmp = rcp(
this,
false);
3983 RCP<const MV> A_tmp;
3985 A_tmp = rcp(
new MV(A, Teuchos::Copy));
3987 A_tmp = rcpFromRef(A);
3990 RCP<const MV> B_tmp;
3992 B_tmp = rcp(
new MV(B, Teuchos::Copy));
3994 B_tmp = rcpFromRef(B);
3997 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!C_tmp->isConstantStride() || !B_tmp->isConstantStride() ||
3998 !A_tmp->isConstantStride(),
4000 "Failed to make temporary constant-stride copies of MultiVectors.");
4003 const LO A_lclNumRows = A_tmp->getLocalLength();
4004 const LO A_numVecs = A_tmp->getNumVectors();
4005 auto A_lcl = A_tmp->getLocalViewDevice(Access::ReadOnly);
4006 auto A_sub = Kokkos::subview(A_lcl,
4007 std::make_pair(LO(0), A_lclNumRows),
4008 std::make_pair(LO(0), A_numVecs));
4010 const LO B_lclNumRows = B_tmp->getLocalLength();
4011 const LO B_numVecs = B_tmp->getNumVectors();
4012 auto B_lcl = B_tmp->getLocalViewDevice(Access::ReadOnly);
4013 auto B_sub = Kokkos::subview(B_lcl,
4014 std::make_pair(LO(0), B_lclNumRows),
4015 std::make_pair(LO(0), B_numVecs));
4017 const LO C_lclNumRows = C_tmp->getLocalLength();
4018 const LO C_numVecs = C_tmp->getNumVectors();
4020 auto C_lcl = C_tmp->getLocalViewDevice(Access::ReadWrite);
4021 auto C_sub = Kokkos::subview(C_lcl,
4022 std::make_pair(LO(0), C_lclNumRows),
4023 std::make_pair(LO(0), C_numVecs));
4024 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' : (transA == Teuchos::TRANS ?
'T' :
'C'));
4025 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' : (transB == Teuchos::TRANS ?
'T' :
'C'));
4028 ProfilingRegion regionGemm(
"Tpetra::MV::multiply-call-gemm");
4030 KokkosBlas::gemm(&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4034 if (!isConstantStride()) {
4039 A_tmp = Teuchos::null;
4040 B_tmp = Teuchos::null;
4048 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4053 Scalar scalarThis) {
4055 using Kokkos::subview;
4056 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4058 const size_t lclNumRows = this->getLocalLength();
4059 const size_t numVecs = this->getNumVectors();
4062 std::runtime_error,
"MultiVectors do not have the same local length.");
4063 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4065 "this->getNumVectors"
4067 << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors()
4070 auto this_view = this->getLocalViewDevice(Access::ReadWrite);
4080 KokkosBlas::mult(scalarThis,
4083 subview(A_view, ALL(), 0),
4086 for (
size_t j = 0; j < numVecs; ++j) {
4087 const size_t C_col = isConstantStride() ? j : whichVectors_[j];
4089 KokkosBlas::mult(scalarThis,
4090 subview(this_view, ALL(), C_col),
4092 subview(A_view, ALL(), 0),
4093 subview(B_view, ALL(), B_col));
4098 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4102 using ::Tpetra::Details::ProfilingRegion;
4103 ProfilingRegion region(
"Tpetra::MV::reduce");
4105 const auto map = this->getMap();
4106 if (map.get() ==
nullptr) {
4109 const auto comm = map->getComm();
4110 if (comm.get() ==
nullptr) {
4116 const bool changed_on_device = this->need_sync_host();
4117 if (changed_on_device) {
4121 Kokkos::fence(
"MultiVector::reduce");
4122 auto X_lcl = this->getLocalViewDevice(Access::ReadWrite);
4125 auto X_lcl = this->getLocalViewHost(Access::ReadWrite);
4130 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4136 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4137 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4138 TEUCHOS_TEST_FOR_EXCEPTION(
4139 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4141 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4142 <<
" is invalid. The range of valid row indices on this process "
4143 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4144 <<
", " << maxLocalIndex <<
"].");
4145 TEUCHOS_TEST_FOR_EXCEPTION(
4146 vectorIndexOutOfRange(col),
4148 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4149 <<
" of the multivector is invalid.");
4152 auto view = this->getLocalViewHost(Access::ReadWrite);
4153 const size_t colInd = isConstantStride() ? col : whichVectors_[col];
4154 view(lclRow, colInd) = ScalarValue;
4157 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4162 const bool atomic) {
4164 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4165 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4166 TEUCHOS_TEST_FOR_EXCEPTION(
4167 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4169 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4170 <<
" is invalid. The range of valid row indices on this process "
4171 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4172 <<
", " << maxLocalIndex <<
"].");
4173 TEUCHOS_TEST_FOR_EXCEPTION(
4174 vectorIndexOutOfRange(col),
4176 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4177 <<
" of the multivector is invalid.");
4180 const size_t colInd = isConstantStride() ? col : whichVectors_[col];
4182 auto view = this->getLocalViewHost(Access::ReadWrite);
4184 Kokkos::atomic_add(&(view(lclRow, colInd)), value);
4186 view(lclRow, colInd) += value;
4190 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4197 const LocalOrdinal lclRow = this->map_->getLocalElement(gblRow);
4200 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4201 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
4203 "Global row index " << gblRow <<
" is not present on this process "
4204 << this->getMap()->getComm()->getRank() <<
".");
4205 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->vectorIndexOutOfRange(col), std::runtime_error,
4206 "Vector index " << col <<
" of the MultiVector is invalid.");
4209 this->replaceLocalValue(lclRow, col, ScalarValue);
4212 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4217 const bool atomic) {
4220 const LocalOrdinal lclRow = this->map_->getLocalElement(globalRow);
4223 TEUCHOS_TEST_FOR_EXCEPTION(
4224 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
4226 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4227 <<
" is not present on this process "
4228 << this->getMap()->getComm()->getRank() <<
".");
4229 TEUCHOS_TEST_FOR_EXCEPTION(
4230 vectorIndexOutOfRange(col),
4232 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4233 <<
" of the multivector is invalid.");
4236 this->sumIntoLocalValue(lclRow, col, value, atomic);
4239 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4241 Teuchos::ArrayRCP<T>
4246 typename dual_view_type::array_layout,
4249 const size_t col = isConstantStride() ? j : whichVectors_[j];
4250 col_dual_view_type X_col =
4251 Kokkos::subview(view_, Kokkos::ALL(), col);
4252 return Kokkos::Compat::persistingView(X_col.view_device());
4255 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4261 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4267 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4271 using Teuchos::TypeNameTraits;
4273 std::ostringstream out;
4274 out <<
"\"" << className <<
"\": {";
4275 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name()
4276 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name()
4277 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name()
4278 <<
", Node" << Node::name()
4280 if (this->getObjectLabel() !=
"") {
4281 out <<
"Label: \"" << this->getObjectLabel() <<
"\", ";
4283 out <<
", numRows: " << this->getGlobalLength();
4284 if (className !=
"Tpetra::Vector") {
4285 out <<
", numCols: " << this->getNumVectors()
4286 <<
", isConstantStride: " << this->isConstantStride();
4288 if (this->isConstantStride()) {
4289 out <<
", columnStride: " << this->getStride();
4296 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4300 return this->descriptionImpl(
"Tpetra::MultiVector");
4304 template <
typename ViewType>
4305 void print_vector(Teuchos::FancyOStream& out,
const char* prefix,
const ViewType& v) {
4307 static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
4308 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
4309 static_assert(ViewType::rank == 2,
4310 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
4313 out <<
"Values(" << prefix <<
"): " << std::endl
4315 const size_t numRows = v.extent(0);
4316 const size_t numCols = v.extent(1);
4318 for (
size_t i = 0; i < numRows; ++i) {
4320 if (i + 1 < numRows) {
4325 for (
size_t i = 0; i < numRows; ++i) {
4326 for (
size_t j = 0; j < numCols; ++j) {
4328 if (j + 1 < numCols) {
4332 if (i + 1 < numRows) {
4341 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4345 typedef LocalOrdinal LO;
4348 if (vl <= Teuchos::VERB_LOW) {
4349 return std::string();
4351 auto map = this->getMap();
4352 if (map.is_null()) {
4353 return std::string();
4355 auto outStringP = Teuchos::rcp(
new std::ostringstream());
4356 auto outp = Teuchos::getFancyOStream(outStringP);
4357 Teuchos::FancyOStream& out = *outp;
4358 auto comm = map->getComm();
4359 const int myRank = comm->getRank();
4360 const int numProcs = comm->getSize();
4362 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4363 Teuchos::OSTab tab1(out);
4366 const LO lclNumRows =
static_cast<LO
>(this->getLocalLength());
4367 out <<
"Local number of rows: " << lclNumRows << endl;
4369 if (vl > Teuchos::VERB_MEDIUM) {
4372 if (this->getNumVectors() !=
static_cast<size_t>(1)) {
4373 out <<
"isConstantStride: " << this->isConstantStride() << endl;
4375 if (this->isConstantStride()) {
4376 out <<
"Column stride: " << this->getStride() << endl;
4379 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4387 auto X_dev = view_.getDeviceCopy();
4388 auto X_host = view_.getHostCopy();
4390 if (X_dev.data() == X_host.data()) {
4392 Details::print_vector(out,
"unified", X_host);
4394 Details::print_vector(out,
"host", X_host);
4395 Details::print_vector(out,
"dev", X_dev);
4400 return outStringP->str();
4403 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4406 const std::string& className,
4407 const Teuchos::EVerbosityLevel verbLevel)
const {
4409 using Teuchos::TypeNameTraits;
4410 using Teuchos::VERB_DEFAULT;
4411 using Teuchos::VERB_LOW;
4412 using Teuchos::VERB_NONE;
4413 const Teuchos::EVerbosityLevel vl =
4414 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4416 if (vl == VERB_NONE) {
4423 auto map = this->getMap();
4424 if (map.is_null()) {
4427 auto comm = map->getComm();
4428 if (comm.is_null()) {
4432 const int myRank = comm->getRank();
4441 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4445 tab0 = Teuchos::rcp(
new Teuchos::OSTab(out));
4446 out <<
"\"" << className <<
"\":" << endl;
4447 tab1 = Teuchos::rcp(
new Teuchos::OSTab(out));
4449 out <<
"Template parameters:" << endl;
4450 Teuchos::OSTab tab2(out);
4451 out <<
"Scalar: " << TypeNameTraits<Scalar>::name() << endl
4452 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name() << endl
4453 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name() << endl
4454 <<
"Node: " << Node::name() << endl;
4456 if (this->getObjectLabel() !=
"") {
4457 out <<
"Label: \"" << this->getObjectLabel() <<
"\", ";
4459 out <<
"Global number of rows: " << this->getGlobalLength() << endl;
4460 if (className !=
"Tpetra::Vector") {
4461 out <<
"Number of columns: " << this->getNumVectors() << endl;
4468 if (vl > VERB_LOW) {
4469 const std::string lclStr = this->localDescribeToString(vl);
4470 ::Tpetra::Details::gathervPrint(out, lclStr, *comm);
4474 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4477 const Teuchos::EVerbosityLevel verbLevel)
const {
4478 this->describeImpl(out,
"Tpetra::MultiVector", verbLevel);
4481 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4487 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4490 using ::Tpetra::Details::localDeepCopy;
4491 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4493 TEUCHOS_TEST_FOR_EXCEPTION(this->getGlobalLength() != src.
getGlobalLength() ||
4495 std::invalid_argument,
4496 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4497 "objects do not match. src has dimensions ["
4499 <<
"," << src.
getNumVectors() <<
"], and *this has dimensions ["
4500 << this->getGlobalLength() <<
"," << this->getNumVectors() <<
"].");
4502 TEUCHOS_TEST_FOR_EXCEPTION(this->getLocalLength() != src.
getLocalLength(), std::invalid_argument,
4503 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4504 "objects do not match. src has "
4506 <<
" and *this has " << this->getLocalLength() <<
" row(s).");
4520 if (src_last_updated_on_host) {
4521 localDeepCopy(this->getLocalViewHost(Access::ReadWrite),
4523 this->isConstantStride(),
4525 this->whichVectors_(),
4528 localDeepCopy(this->getLocalViewDevice(Access::ReadWrite),
4530 this->isConstantStride(),
4532 this->whichVectors_(),
4537 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4539 Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
4545 this->getNumVectors(),
4551 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4554 using ::Tpetra::Details::PackTraits;
4556 const size_t l1 = this->getLocalLength();
4558 if ((l1 != l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4565 template <
class ST,
class LO,
class GO,
class NT>
4568 std::swap(mv.
map_, this->map_);
4569 std::swap(mv.
view_, this->view_);
4573 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4574 template <
class ST,
class LO,
class GO,
class NT>
4576 const Teuchos::SerialDenseMatrix<int, ST>& src) {
4577 using ::Tpetra::Details::localDeepCopy;
4579 using IST =
typename MV::impl_scalar_type;
4580 using input_view_type =
4581 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4582 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4583 using pair_type = std::pair<LO, LO>;
4585 const LO numRows =
static_cast<LO
>(src.numRows());
4586 const LO numCols =
static_cast<LO
>(src.numCols());
4588 TEUCHOS_TEST_FOR_EXCEPTION(numRows != static_cast<LO>(dst.
getLocalLength()) ||
4590 std::invalid_argument,
"Tpetra::deep_copy: On Process " << dst.
getMap()->getComm()->getRank() <<
", dst is " << dst.
getLocalLength() <<
" x " << dst.
getNumVectors() <<
", but src is " << numRows <<
" x " << numCols <<
".");
4592 const IST* src_raw =
reinterpret_cast<const IST*
>(src.values());
4593 input_view_type src_orig(src_raw, src.stride(), numCols);
4594 input_view_type src_view =
4595 Kokkos::subview(src_orig, pair_type(0, numRows), Kokkos::ALL());
4597 constexpr
bool src_isConstantStride =
true;
4598 Teuchos::ArrayView<const size_t> srcWhichVectors(
nullptr, 0);
4602 src_isConstantStride,
4603 getMultiVectorWhichVectors(dst),
4607 template <
class ST,
class LO,
class GO,
class NT>
4608 void deep_copy(Teuchos::SerialDenseMatrix<int, ST>& dst,
4610 using ::Tpetra::Details::localDeepCopy;
4612 using IST =
typename MV::impl_scalar_type;
4613 using output_view_type =
4614 Kokkos::View<IST**, Kokkos::LayoutLeft,
4615 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4616 using pair_type = std::pair<LO, LO>;
4618 const LO numRows =
static_cast<LO
>(dst.numRows());
4619 const LO numCols =
static_cast<LO
>(dst.numCols());
4621 TEUCHOS_TEST_FOR_EXCEPTION(numRows != static_cast<LO>(src.
getLocalLength()) ||
4623 std::invalid_argument,
"Tpetra::deep_copy: On Process " << src.
getMap()->getComm()->getRank() <<
", src is " << src.
getLocalLength() <<
" x " << src.
getNumVectors() <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4625 IST* dst_raw =
reinterpret_cast<IST*
>(dst.values());
4626 output_view_type dst_orig(dst_raw, dst.stride(), numCols);
4628 Kokkos::subview(dst_orig, pair_type(0, numRows), Kokkos::ALL());
4630 constexpr
bool dst_isConstantStride =
true;
4631 Teuchos::ArrayView<const size_t> dstWhichVectors(
nullptr, 0);
4634 localDeepCopy(dst_view,
4636 dst_isConstantStride,
4639 getMultiVectorWhichVectors(src));
4641 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4643 template <
class Scalar,
class LO,
class GO,
class NT>
4644 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4646 size_t numVectors) {
4648 return Teuchos::rcp(
new MV(map, numVectors));
4651 template <
class ST,
class LO,
class GO,
class NT>
4668 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4669 #define TPETRA_MULTIVECTOR_INSTANT(SCALAR, LO, GO, NODE) \
4670 template class MultiVector<SCALAR, LO, GO, NODE>; \
4671 template MultiVector<SCALAR, LO, GO, NODE> createCopy(const MultiVector<SCALAR, LO, GO, NODE>& src); \
4672 template Teuchos::RCP<MultiVector<SCALAR, LO, GO, NODE> > createMultiVector(const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4673 template void deep_copy(MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4674 template void deep_copy(Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4677 #define TPETRA_MULTIVECTOR_INSTANT(SCALAR, LO, GO, NODE) \
4678 template class MultiVector<SCALAR, LO, GO, NODE>; \
4679 template MultiVector<SCALAR, LO, GO, NODE> createCopy(const MultiVector<SCALAR, LO, GO, NODE>& src);
4681 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4683 #define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO, SI, LO, GO, NODE) \
4685 template Teuchos::RCP<MultiVector<SO, LO, GO, NODE> > \
4686 MultiVector<SI, LO, GO, NODE>::convert<SO>() const;
4688 #endif // TPETRA_MULTIVECTOR_DEF_HPP
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
One or more distributed dense vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
static bool debug()
Whether Tpetra is in debug mode.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
int local_ordinal_type
Default value of Scalar template parameter.
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Insert new values that don't currently exist.
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
wrapped_dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
Functions for initializing and finalizing Tpetra.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Declaration of Tpetra::Details::isInterComm.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
Replace existing values with new values.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
void start()
Start the deep_copy counter.
A parallel distribution of indices over processes.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
Base class for distributed Tpetra objects that support data redistribution.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
EWhichNorm
Input argument for normImpl() (which see).
Accumulate new values into existing values (may not be supported in all classes)
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.