42 #ifndef TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
47 #include "Teuchos_OrdinalTraits.hpp"
62 template<
class MultiVectorType>
63 struct RawHostPtrFromMultiVector {
64 typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
66 static impl_scalar_type* getRawPtr (MultiVectorType& X) {
72 auto X_view_host = X.template getLocalView<typename MultiVectorType::dual_view_type::t_host::device_type> ();
73 impl_scalar_type* X_raw = X_view_host.data ();
90 template<
class S,
class LO,
class GO,
class N>
94 return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
100 namespace Experimental {
102 template<
class Scalar,
class LO,
class GO,
class Node>
103 typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type
110 template<
class Scalar,
class LO,
class GO,
class Node>
111 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
116 const BMV* src_bmv =
dynamic_cast<const BMV*
> (&src);
117 TEUCHOS_TEST_FOR_EXCEPTION(
118 src_bmv == NULL, std::invalid_argument,
"Tpetra::Experimental::"
119 "BlockMultiVector: The source object of an Import or Export to a "
120 "BlockMultiVector, must also be a BlockMultiVector.");
121 return Teuchos::rcp (src_bmv,
false);
124 template<
class Scalar,
class LO,
class GO,
class Node>
131 pointMap_ (makePointMap (meshMap, blockSize)),
132 mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
133 mvData_ (getRawHostPtrFromMultiVector (mv_)),
134 blockSize_ (blockSize)
140 template<
class Scalar,
class LO,
class GO,
class Node>
148 pointMap_ (pointMap),
149 mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
150 mvData_ (getRawHostPtrFromMultiVector (mv_)),
151 blockSize_ (blockSize)
157 template<
class Scalar,
class LO,
class GO,
class Node>
161 const LO blockSize) :
165 blockSize_ (blockSize)
181 RCP<const mv_type> X_view_const;
184 Teuchos::Array<size_t> cols (0);
185 X_view_const = X_mv.
subView (cols ());
187 X_view_const = X_mv.
subView (Teuchos::Range1D (0, numCols-1));
189 TEUCHOS_TEST_FOR_EXCEPTION(
190 X_view_const.is_null (), std::logic_error,
"Tpetra::Experimental::"
191 "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
192 "should never happen. Please report this bug to the Tpetra developers.");
197 RCP<mv_type> X_view = Teuchos::rcp_const_cast<
mv_type> (X_view_const);
199 TEUCHOS_TEST_FOR_EXCEPTION(
200 X_view->getCopyOrView () != Teuchos::View, std::logic_error,
"Tpetra::"
201 "Experimental::BlockMultiVector constructor: We just set a MultiVector "
202 "to have view semantics, but it claims that it doesn't have view "
203 "semantics. This should never happen. "
204 "Please report this bug to the Tpetra developers.");
209 Teuchos::RCP<const map_type> pointMap =
mv_.
getMap ();
210 if (! pointMap.is_null ()) {
211 pointMap_ = *pointMap;
213 mvData_ = getRawHostPtrFromMultiVector (
mv_);
216 template<
class Scalar,
class LO,
class GO,
class Node>
221 const size_t offset) :
223 meshMap_ (newMeshMap),
224 pointMap_ (newPointMap),
225 mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()),
226 mvData_ (getRawHostPtrFromMultiVector (mv_)),
227 blockSize_ (X.getBlockSize ())
233 template<
class Scalar,
class LO,
class GO,
class Node>
237 const size_t offset) :
239 meshMap_ (newMeshMap),
240 pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
241 mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()),
242 mvData_ (getRawHostPtrFromMultiVector (mv_)),
243 blockSize_ (X.getBlockSize ())
249 template<
class Scalar,
class LO,
class GO,
class Node>
260 template<
class Scalar,
class LO,
class GO,
class Node>
266 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
268 const GST gblNumMeshMapInds =
270 const size_t lclNumMeshMapIndices =
272 const GST gblNumPointMapInds =
273 gblNumMeshMapInds *
static_cast<GST
> (blockSize);
274 const size_t lclNumPointMapInds =
275 lclNumMeshMapIndices *
static_cast<size_t> (blockSize);
279 return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
287 const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
288 Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
289 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
290 const GO meshGid = lclMeshGblInds[g];
291 const GO pointGidStart = indexBase +
292 (meshGid - indexBase) * static_cast<GO> (blockSize);
293 const size_type offset = g *
static_cast<size_type
> (blockSize);
294 for (LO k = 0; k < blockSize; ++k) {
295 const GO pointGid = pointGidStart +
static_cast<GO
> (k);
296 lclPointGblInds[offset +
static_cast<size_type
> (k)] = pointGid;
299 return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
305 template<
class Scalar,
class LO,
class GO,
class Node>
310 const Scalar vals[])
const
312 auto X_dst = getLocalBlock (localRowIndex, colIndex);
313 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
319 template<
class Scalar,
class LO,
class GO,
class Node>
324 const Scalar vals[])
const
326 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
329 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
334 template<
class Scalar,
class LO,
class GO,
class Node>
339 const Scalar vals[])
const
341 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
342 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
345 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
350 template<
class Scalar,
class LO,
class GO,
class Node>
355 const Scalar vals[])
const
357 auto X_dst = getLocalBlock (localRowIndex, colIndex);
358 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
360 AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
363 template<
class Scalar,
class LO,
class GO,
class Node>
368 const Scalar vals[])
const
370 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
373 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
378 template<
class Scalar,
class LO,
class GO,
class Node>
383 const Scalar vals[])
const
385 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
386 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
389 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
394 template<
class Scalar,
class LO,
class GO,
class Node>
399 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
402 auto X_ij = getLocalBlock (localRowIndex, colIndex);
403 vals =
reinterpret_cast<Scalar*
> (X_ij.data ());
408 template<
class Scalar,
class LO,
class GO,
class Node>
413 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
414 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
417 auto X_ij = getLocalBlock (localRowIndex, colIndex);
418 vals =
reinterpret_cast<Scalar*
> (X_ij.data ());
423 template<
class Scalar,
class LO,
class GO,
class Node>
427 const LO colIndex)
const
441 if (! isValidLocalMeshIndex (localRowIndex)) {
442 return typename little_vec_type::HostMirror ();
444 const size_t blockSize = getBlockSize ();
445 const size_t offset = colIndex * this->getStrideY () +
446 localRowIndex * blockSize;
448 return typename little_vec_type::HostMirror (blockRaw, blockSize);
452 template<
class Scalar,
class LO,
class GO,
class Node>
453 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
458 using Teuchos::rcpFromRef;
465 const this_type* srcBlkVec =
dynamic_cast<const this_type*
> (&src);
466 if (srcBlkVec == NULL) {
467 const mv_type* srcMultiVec =
dynamic_cast<const mv_type*
> (&src);
468 if (srcMultiVec == NULL) {
472 return rcp (
new mv_type ());
474 return rcp (srcMultiVec,
false);
477 return rcpFromRef (srcBlkVec->mv_);
481 template<
class Scalar,
class LO,
class GO,
class Node>
485 return ! getMultiVectorFromSrcDistObject (src).is_null ();
488 template<
class SC,
class LO,
class GO,
class NT>
489 std::pair<int, std::unique_ptr<std::string>>
493 const size_t numSameIDs,
494 const Kokkos::DualView<
495 const local_ordinal_type*,
498 const Kokkos::DualView<
499 const local_ordinal_type*,
504 const char tfecfFuncName[] =
"copyAndPermuteImpl: ";
505 std::unique_ptr<std::string> errMsg;
507 const int myRank = src.
getMap ()->getComm ()->getRank ();
509 std::unique_ptr<std::string> prefix;
511 std::ostringstream os;
512 os <<
"Proc " << myRank <<
": " << tfecfFuncName;
513 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
514 os <<
"Start" << endl;
515 std::cerr << os.str ();
517 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
518 std::ostringstream os;
519 os <<
"Proc " << myRank <<
": " << tfecfFuncName <<
"permuteToLIDs and/or "
520 "permuteFromLIDs need sync to host. This should never happen.";
521 errMsg = std::unique_ptr<std::string> (
new std::string (os.str ()));
522 return { -1, std::move (errMsg) };
525 const size_t numPermuteLIDs =
static_cast<size_t> (permuteToLIDs.extent (0));
526 if (static_cast<size_t> (permuteFromLIDs.extent (0)) != numPermuteLIDs) {
527 std::ostringstream os;
528 os <<
"Proc " << myRank <<
": " << tfecfFuncName
529 <<
"permuteToLIDs.extent(0) = " << numPermuteLIDs
530 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent (0)
532 errMsg = std::unique_ptr<std::string> (
new std::string (os.str ()));
533 return { -2, std::move (errMsg) };
536 const size_t numVecs =
static_cast<size_t> (src.
getNumVectors ());
538 std::ostringstream os;
539 os << *prefix <<
"Sames: numSameIDs=" << numSameIDs
540 <<
", numVecs=" << numVecs << endl;
541 std::cerr << os.str ();
545 for (
size_t j = 0; j < numVecs; ++j) {
546 for (
size_t lclRow = 0; lclRow < numSameIDs; ++lclRow) {
547 deep_copy (this->getLocalBlock (lclRow, j),
553 std::ostringstream os;
554 os << *prefix <<
"Permutes: numPermuteLIDs=" << numPermuteLIDs << endl;
555 std::cerr << os.str ();
557 auto permuteToLIDs_h = permuteToLIDs.view_host ();
558 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
563 for (
size_t j = 0; j < numVecs; ++j) {
564 for (
size_t k = numSameIDs; k < numPermuteLIDs; ++k) {
565 deep_copy (this->getLocalBlock (permuteToLIDs_h[k], j),
571 std::ostringstream os;
572 os << *prefix <<
"Done" << endl;
573 std::cerr << os.str ();
575 return { 0, std::move (errMsg) };
578 template<
class Scalar,
class LO,
class GO,
class Node>
579 void BlockMultiVector<Scalar, LO, GO, Node>::
580 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
582 #else // TPETRA_ENABLE_DEPRECATED_CODE
584 #endif // TPETRA_ENABLE_DEPRECATED_CODE
585 (
const SrcDistObject& src,
586 const size_t numSameIDs,
588 buffer_device_type>& permuteToLIDs,
590 buffer_device_type>& permuteFromLIDs)
592 const char tfecfFuncName[] =
"copyAndPermute: ";
594 auto srcPtr = getBlockMultiVectorFromSrcDistObject (src);
595 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
596 (srcPtr.is_null (), std::invalid_argument,
597 "The source of an Import or Export to a BlockMultiVector "
598 "must also be a BlockMultiVector.");
599 const auto ret = copyAndPermuteImpl (*srcPtr, numSameIDs,
600 permuteToLIDs, permuteFromLIDs);
604 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
605 (ret.first != 0, std::runtime_error,
"copyAndPermuteImpl "
606 "reports an error: " << * (ret.second));
609 template<
class SC,
class LO,
class GO,
class NT>
610 std::pair<int, std::unique_ptr<std::string>>
611 BlockMultiVector<SC, LO, GO, NT>::
613 (
const Kokkos::DualView<
614 const local_ordinal_type*,
625 size_t& constantNumPackets,
629 using IST = impl_scalar_type;
630 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
631 using host_device_type =
typename exports_dv_type::t_host::device_type;
632 using dst_little_vec_type = Kokkos::View<IST*, host_device_type,
633 Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
634 const char tfecfFuncName[] =
"packAndPrepareImpl: ";
635 std::unique_ptr<std::string> errMsg;
638 std::unique_ptr<std::string> prefix;
640 const int myRank = this->getMap ()->getComm ()->getRank ();
641 std::ostringstream os;
642 os <<
"Proc " << myRank <<
": " << tfecfFuncName;
643 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
644 os <<
"Start" << endl;
645 std::cerr << os.str ();
647 if (exportLIDs.need_sync_host ()) {
648 const int myRank = this->getMap ()->getComm ()->getRank ();
649 std::ostringstream os;
650 os <<
"Proc " << myRank <<
": " << tfecfFuncName <<
"exportLIDs "
651 "needs sync to host. This should never happen.";
652 errMsg = std::unique_ptr<std::string> (
new std::string (os.str ()));
653 return { -1, std::move (errMsg) };
656 const size_t numVecs =
static_cast<size_t> (this->getNumVectors ());
657 const size_t blockSize =
static_cast<size_t> (this->getBlockSize ());
662 constantNumPackets = blockSize * numVecs;
663 const size_t numMeshLIDs =
static_cast<size_t> (exportLIDs.extent (0));
664 const size_t requiredExportsSize = numMeshLIDs * blockSize * numVecs;
666 if (static_cast<size_t> (exports.extent (0)) != requiredExportsSize) {
667 exports = exports_dv_type ();
668 exports = exports_dv_type (
"exports", requiredExportsSize);
670 exports.clear_sync_state ();
671 exports.modify_host ();
672 auto exports_h = exports.view_host ();
673 auto exportLIDs_h = exportLIDs.view_host ();
676 size_t curExportPos = 0;
677 for (
size_t meshLidIndex = 0; meshLidIndex < numMeshLIDs; ++meshLidIndex) {
678 for (
size_t j = 0; j < numVecs; ++j, curExportPos += blockSize) {
679 const LO meshLid = exportLIDs_h[meshLidIndex];
681 IST*
const curExportPtr = &exports_h[curExportPos];
682 dst_little_vec_type X_dst (curExportPtr, blockSize);
683 auto X_src = this->getLocalBlock (meshLid, j);
689 catch (std::exception& e) {
690 const int myRank = this->getMap ()->getComm ()->getRank ();
691 std::ostringstream os;
692 os <<
"Proc " << myRank <<
": " << tfecfFuncName
693 <<
" threw an exception: " << e.what ();
694 errMsg = std::unique_ptr<std::string> (
new std::string (os.str ()));
695 return { -2, std::move (errMsg) };
699 std::ostringstream os;
700 os << *prefix <<
"Done" << endl;
701 std::cerr << os.str ();
703 return { 0, std::move (errMsg) };
706 template<
class Scalar,
class LO,
class GO,
class Node>
707 void BlockMultiVector<Scalar, LO, GO, Node>::
708 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
710 #else // TPETRA_ENABLE_DEPRECATED_CODE
712 #endif // TPETRA_ENABLE_DEPRECATED_CODE
713 (
const SrcDistObject& src,
715 buffer_device_type>& exportLIDs,
716 Kokkos::DualView<packet_type*,
717 buffer_device_type>& exports,
718 Kokkos::DualView<
size_t*,
719 buffer_device_type> numPacketsPerLID,
720 size_t& constantNumPackets,
723 const char tfecfFuncName[] =
"packAndPrepare: ";
725 auto srcAsBmvPtr = getBlockMultiVectorFromSrcDistObject (src);
726 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
727 (srcAsBmvPtr.is_null (), std::invalid_argument,
728 "The source of an Import or Export to a BlockMultiVector "
729 "must also be a BlockMultiVector.");
731 srcAsBmvPtr->packAndPrepareImpl (exportLIDs, exports,
733 constantNumPackets, distor);
737 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
738 (ret.first != 0, std::runtime_error,
"packAndPrepareImpl "
739 "reports an error: " << * (ret.second));
742 template<
class SC,
class LO,
class GO,
class NT>
743 std::pair<int, std::unique_ptr<std::string>>
744 BlockMultiVector<SC, LO, GO, NT>::
746 (
const Kokkos::DualView<
747 const local_ordinal_type*,
758 const size_t constantNumPackets,
763 using IST = impl_scalar_type;
764 using KAT = Kokkos::ArithTraits<IST>;
765 using imports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
766 using host_device_type =
typename imports_dv_type::t_host::device_type;
767 using src_little_vec_type = Kokkos::View<
const IST*, host_device_type,
768 Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
769 const char tfecfFuncName[] =
"unpackAndCombineImpl: ";
770 std::unique_ptr<std::string> errMsg;
773 std::unique_ptr<std::string> prefix;
775 const int myRank = this->getMap ()->getComm ()->getRank ();
776 std::ostringstream os;
777 os <<
"Proc " << myRank <<
": " << tfecfFuncName;
778 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
779 os <<
"Start" << endl;
780 std::cerr << os.str ();
783 if (combineMode !=
ADD && combineMode !=
REPLACE &&
785 combineMode !=
ZERO) {
786 std::ostringstream os;
787 os << tfecfFuncName <<
"Invalid CombineMode: " << combineMode <<
". "
788 "Valid CombineMode values: ADD, REPLACE, INSERT, ABSMAX, and ZERO.";
789 errMsg = std::unique_ptr<std::string> (
new std::string (os.str ()));
790 return { -1, std::move (errMsg) };
792 if (combineMode ==
ZERO) {
793 return { 0, std::move (errMsg) };
796 if (importLIDs.need_sync_host ()) {
797 const int myRank = this->getMap ()->getComm ()->getRank ();
798 std::ostringstream os;
799 os <<
"Proc " << myRank <<
": " << tfecfFuncName <<
"importLIDs "
800 "needs sync to host. This should never happen.";
801 errMsg = std::unique_ptr<std::string> (
new std::string (os.str ()));
802 return { -2, std::move (errMsg) };
807 const size_t numMeshLIDs =
static_cast<size_t> (importLIDs.extent (0));
808 const size_t blockSize =
static_cast<size_t> (this->getBlockSize ());
809 const size_t numVecs =
static_cast<size_t> (this->getNumVectors ());
811 const size_t requiredImportsSize = numMeshLIDs * blockSize * numVecs;
812 if (static_cast<size_t> (imports.extent (0)) < requiredImportsSize) {
813 const int myRank = this->getMap ()->getComm ()->getRank ();
814 std::ostringstream os;
815 os <<
"Proc " << myRank <<
": " << tfecfFuncName <<
"imports.extent(0) = "
816 << imports.extent (0) <<
" < requiredImportsSize = "
817 << requiredImportsSize <<
".";
818 errMsg = std::unique_ptr<std::string> (
new std::string (os.str ()));
819 return { -3, std::move (errMsg) };
822 auto importLIDs_h = importLIDs.view_host ();
823 if (imports.need_sync_host ()) {
824 imports.sync_host ();
826 auto imports_h = imports.view_host ();
828 size_t curImportPos = 0;
829 for (
size_t meshLidIndex = 0; meshLidIndex < numMeshLIDs; ++meshLidIndex) {
830 for (
size_t j = 0; j < numVecs; ++j, curImportPos += blockSize) {
831 const LO meshLid = importLIDs_h[meshLidIndex];
832 const IST*
const curImportPtr = &imports_h[curImportPos];
834 src_little_vec_type X_src (curImportPtr, blockSize);
835 auto X_dst = this->getLocalBlock (meshLid, j);
840 else if (combineMode ==
ADD) {
842 AXPY (static_cast<IST> (KAT::one ()), X_src, X_dst);
844 else if (combineMode ==
ABSMAX) {
845 ::Tpetra::Experimental::Impl::absMax (X_dst, X_src);
851 std::ostringstream os;
852 os << *prefix <<
"Done" << endl;
853 std::cerr << os.str ();
855 return { 0, std::move (errMsg) };
858 template<
class Scalar,
class LO,
class GO,
class Node>
859 void BlockMultiVector<Scalar, LO, GO, Node>::
860 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
862 #else // TPETRA_ENABLE_DEPRECATED_CODE
864 #endif // TPETRA_ENABLE_DEPRECATED_CODE
866 buffer_device_type>& importLIDs,
867 Kokkos::DualView<packet_type*,
868 buffer_device_type> imports,
869 Kokkos::DualView<
size_t*,
870 buffer_device_type> numPacketsPerLID,
871 const size_t constantNumPackets,
875 const char tfecfFuncName[] =
"unpackAndCombine: ";
877 unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
878 constantNumPackets, distor, combineMode);
882 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
883 (ret.first != 0, std::runtime_error,
"unpackAndCombineImpl "
884 "reports an error: " << * (ret.second));
887 template<
class Scalar,
class LO,
class GO,
class Node>
891 return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
892 meshMap_.isNodeLocalElement (meshLocalIndex);
895 template<
class Scalar,
class LO,
class GO,
class Node>
902 template<
class Scalar,
class LO,
class GO,
class Node>
909 template<
class Scalar,
class LO,
class GO,
class Node>
915 mv_.update (alpha, X.
mv_, beta);
920 template <
typename Scalar,
typename ViewY,
typename ViewD,
typename ViewX>
921 struct BlockWiseMultiply {
922 typedef typename ViewD::size_type Size;
925 typedef typename ViewD::device_type Device;
926 typedef typename ViewD::non_const_value_type ImplScalar;
927 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
929 template <
typename View>
930 using UnmanagedView = Kokkos::View<
typename View::data_type,
typename View::array_layout,
931 typename View::device_type, Unmanaged>;
932 typedef UnmanagedView<ViewY> UnMViewY;
933 typedef UnmanagedView<ViewD> UnMViewD;
934 typedef UnmanagedView<ViewX> UnMViewX;
936 const Size block_size_;
943 BlockWiseMultiply (
const Size block_size,
const Scalar& alpha,
944 const ViewY& Y,
const ViewD& D,
const ViewX& X)
945 : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
948 KOKKOS_INLINE_FUNCTION
949 void operator() (
const Size k)
const {
950 const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
951 auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
952 const auto num_vecs = X_.extent(1);
953 for (Size i = 0; i < num_vecs; ++i) {
954 Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
955 auto X_curBlk = Kokkos::subview(X_, kslice, i);
956 auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
965 template <
typename Scalar,
typename ViewY,
typename ViewD,
typename ViewX>
966 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
967 createBlockWiseMultiply (
const int block_size,
const Scalar& alpha,
968 const ViewY& Y,
const ViewD& D,
const ViewX& X) {
969 return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
972 template <
typename ViewY,
976 typename LO =
typename ViewY::size_type>
977 class BlockJacobiUpdate {
979 typedef typename ViewD::device_type Device;
980 typedef typename ViewD::non_const_value_type ImplScalar;
981 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
983 template <
typename ViewType>
984 using UnmanagedView = Kokkos::View<
typename ViewType::data_type,
985 typename ViewType::array_layout,
986 typename ViewType::device_type,
988 typedef UnmanagedView<ViewY> UnMViewY;
989 typedef UnmanagedView<ViewD> UnMViewD;
990 typedef UnmanagedView<ViewZ> UnMViewZ;
1000 BlockJacobiUpdate (
const ViewY& Y,
1001 const Scalar& alpha,
1004 const Scalar& beta) :
1005 blockSize_ (D.extent (1)),
1013 static_assert (static_cast<int> (ViewY::rank) == 1,
1014 "Y must have rank 1.");
1015 static_assert (static_cast<int> (ViewD::rank) == 3,
"D must have rank 3.");
1016 static_assert (static_cast<int> (ViewZ::rank) == 1,
1017 "Z must have rank 1.");
1023 KOKKOS_INLINE_FUNCTION
void
1024 operator() (
const LO& k)
const
1027 using Kokkos::subview;
1028 typedef Kokkos::pair<LO, LO> range_type;
1029 typedef Kokkos::Details::ArithTraits<Scalar> KAT;
1033 auto D_curBlk = subview (D_, k, ALL (), ALL ());
1034 const range_type kslice (k*blockSize_, (k+1)*blockSize_);
1038 auto Z_curBlk = subview (Z_, kslice);
1039 auto Y_curBlk = subview (Y_, kslice);
1041 if (beta_ == KAT::zero ()) {
1044 else if (beta_ != KAT::one ()) {
1051 template<
class ViewY,
1055 class LO =
typename ViewD::size_type>
1057 blockJacobiUpdate (
const ViewY& Y,
1058 const Scalar& alpha,
1063 static_assert (Kokkos::Impl::is_view<ViewY>::value,
"Y must be a Kokkos::View.");
1064 static_assert (Kokkos::Impl::is_view<ViewD>::value,
"D must be a Kokkos::View.");
1065 static_assert (Kokkos::Impl::is_view<ViewZ>::value,
"Z must be a Kokkos::View.");
1066 static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
1067 "Y and Z must have the same rank.");
1068 static_assert (static_cast<int> (ViewD::rank) == 3,
"D must have rank 3.");
1070 const auto lclNumMeshRows = D.extent (0);
1072 #ifdef HAVE_TPETRA_DEBUG
1076 const auto blkSize = D.extent (1);
1077 const auto lclNumPtRows = lclNumMeshRows * blkSize;
1078 TEUCHOS_TEST_FOR_EXCEPTION
1079 (Y.extent (0) != lclNumPtRows, std::invalid_argument,
1080 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) <<
" != "
1081 "D.extent(0)*D.extent(1) = " << lclNumMeshRows <<
" * " << blkSize
1082 <<
" = " << lclNumPtRows <<
".");
1083 TEUCHOS_TEST_FOR_EXCEPTION
1084 (Y.extent (0) != Z.extent (0), std::invalid_argument,
1085 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) <<
" != "
1086 "Z.extent(0) = " << Z.extent (0) <<
".");
1087 TEUCHOS_TEST_FOR_EXCEPTION
1088 (Y.extent (1) != Z.extent (1), std::invalid_argument,
1089 "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) <<
" != "
1090 "Z.extent(1) = " << Z.extent (1) <<
".");
1091 #endif // HAVE_TPETRA_DEBUG
1093 BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
1094 typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
1096 range_type range (0, static_cast<LO> (lclNumMeshRows));
1097 Kokkos::parallel_for (range, functor);
1102 template<
class Scalar,
class LO,
class GO,
class Node>
1111 typedef typename device_type::memory_space memory_space;
1112 const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
1114 if (alpha == STS::zero ()) {
1115 this->putScalar (STS::zero ());
1118 const LO blockSize = this->getBlockSize ();
1120 auto X_lcl = X.
mv_.template getLocalView<memory_space> ();
1121 auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
1122 auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
1129 Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
1130 Kokkos::parallel_for (range, bwm);
1134 template<
class Scalar,
class LO,
class GO,
class Node>
1144 using Kokkos::subview;
1145 typedef typename device_type::memory_space memory_space;
1148 const IST alphaImpl =
static_cast<IST
> (alpha);
1149 const IST betaImpl =
static_cast<IST
> (beta);
1150 const LO numVecs = mv_.getNumVectors ();
1152 auto X_lcl = X.
mv_.template getLocalView<memory_space> ();
1153 auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
1154 auto Z_lcl = Z.
mv_.template getLocalView<memory_space> ();
1156 if (alpha == STS::zero ()) {
1160 Z.
update (STS::one (), X, -STS::one ());
1161 for (LO j = 0; j < numVecs; ++j) {
1162 auto X_lcl_j = subview (X_lcl, ALL (), j);
1163 auto Y_lcl_j = subview (Y_lcl, ALL (), j);
1164 auto Z_lcl_j = subview (Z_lcl, ALL (), j);
1165 Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
1178 #define TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
1179 namespace Experimental { \
1180 template class BlockMultiVector< S, LO, GO, NODE >; \
1183 #endif // TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
mv_type mv_
The Tpetra::MultiVector used to represent the data.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
size_t getNumVectors() const
Number of columns in the multivector.
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using a global index.
One or more distributed dense vectors.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a global index.
MultiVector for multiple degrees of freedom per mesh point.
GlobalOrdinal getIndexBase() const
The index base for this Map.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
static bool debug()
Whether Tpetra is in debug mode.
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
int local_ordinal_type
Default value of Scalar template parameter.
void setCopyOrView(const Teuchos::DataAccess copyOrView)
Set whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Insert new values that don't currently exist.
typename device_type::execution_space execution_space
The Kokkos execution space.
Linear algebra kernels for small dense matrices and vectors.
LO getNumVectors() const
Get the number of columns (vectors) in the BlockMultiVector.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
bool getLocalRowView(const LO localRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a local index. ...
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
Replace existing values with new values.
Replace old values with zero.
Teuchos::ArrayView< const GlobalOrdinal > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
BlockMultiVector()
Default constructor.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using local row and column indices.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a local index.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
typename mv_type::device_type device_type
The Kokkos Device type.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
bool getGlobalRowView(const GO globalRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a global index.