40 #ifndef TPETRA_DISTOBJECT_DEF_HPP
41 #define TPETRA_DISTOBJECT_DEF_HPP
51 #include "Tpetra_Distributor.hpp"
54 #include "Tpetra_Details_checkGlobalError.hpp"
57 #include "Teuchos_CommHelpers.hpp"
58 #include "Teuchos_TypeNameTraits.hpp"
66 template<
class DeviceType,
class IndexType =
size_t>
68 SumFunctor (
const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
69 viewToSum_ (viewToSum) {}
70 KOKKOS_INLINE_FUNCTION
void operator() (
const IndexType i,
size_t& lclSum)
const {
71 lclSum += viewToSum_(i);
73 Kokkos::View<const size_t*, DeviceType> viewToSum_;
76 template<
class DeviceType,
class IndexType =
size_t>
78 countTotalImportPackets (
const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
80 using Kokkos::parallel_reduce;
81 typedef DeviceType DT;
82 typedef typename DT::execution_space DES;
83 typedef Kokkos::RangePolicy<DES, IndexType> range_type;
85 const IndexType numOut = numImportPacketsPerLID.extent (0);
86 size_t totalImportPackets = 0;
87 parallel_reduce (
"Count import packets",
88 range_type (0, numOut),
89 SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
91 return totalImportPackets;
96 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
101 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
104 using Teuchos::TimeMonitor;
106 RCP<Time> doXferTimer =
107 TimeMonitor::lookupCounter (
"Tpetra::DistObject::doTransfer");
108 if (doXferTimer.is_null ()) {
110 TimeMonitor::getNewCounter (
"Tpetra::DistObject::doTransfer");
112 doXferTimer_ = doXferTimer;
114 RCP<Time> copyAndPermuteTimer =
115 TimeMonitor::lookupCounter (
"Tpetra::DistObject::copyAndPermute");
116 if (copyAndPermuteTimer.is_null ()) {
117 copyAndPermuteTimer =
118 TimeMonitor::getNewCounter (
"Tpetra::DistObject::copyAndPermute");
120 copyAndPermuteTimer_ = copyAndPermuteTimer;
122 RCP<Time> packAndPrepareTimer =
123 TimeMonitor::lookupCounter (
"Tpetra::DistObject::packAndPrepare");
124 if (packAndPrepareTimer.is_null ()) {
125 packAndPrepareTimer =
126 TimeMonitor::getNewCounter (
"Tpetra::DistObject::packAndPrepare");
128 packAndPrepareTimer_ = packAndPrepareTimer;
130 RCP<Time> doPostsAndWaitsTimer =
131 TimeMonitor::lookupCounter (
"Tpetra::DistObject::doPostsAndWaits");
132 if (doPostsAndWaitsTimer.is_null ()) {
133 doPostsAndWaitsTimer =
134 TimeMonitor::getNewCounter (
"Tpetra::DistObject::doPostsAndWaits");
136 doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
138 RCP<Time> unpackAndCombineTimer =
139 TimeMonitor::lookupCounter (
"Tpetra::DistObject::unpackAndCombine");
140 if (unpackAndCombineTimer.is_null ()) {
141 unpackAndCombineTimer =
142 TimeMonitor::getNewCounter (
"Tpetra::DistObject::unpackAndCombine");
144 unpackAndCombineTimer_ = unpackAndCombineTimer;
145 #endif // HAVE_TPETRA_TRANSFER_TIMERS
148 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
153 using Teuchos::TypeNameTraits;
155 std::ostringstream os;
156 os <<
"\"Tpetra::DistObject\": {"
157 <<
"Packet: " << TypeNameTraits<packet_type>::name ()
158 <<
", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
159 <<
", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
160 <<
", Node: " << TypeNameTraits<Node>::name ();
161 if (this->getObjectLabel () !=
"") {
162 os <<
"Label: \"" << this->getObjectLabel () <<
"\"";
168 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
172 const Teuchos::EVerbosityLevel verbLevel)
const
174 using Teuchos::rcpFromRef;
175 using Teuchos::TypeNameTraits;
177 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
178 Teuchos::VERB_LOW : verbLevel;
179 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
180 const int myRank = comm.is_null () ? 0 : comm->getRank ();
181 const int numProcs = comm.is_null () ? 1 : comm->getSize ();
183 if (vl != Teuchos::VERB_NONE) {
184 Teuchos::OSTab tab0 (out);
186 out <<
"\"Tpetra::DistObject\":" << endl;
188 Teuchos::OSTab tab1 (out);
190 out <<
"Template parameters:" << endl;
192 Teuchos::OSTab tab2 (out);
193 out <<
"Packet: " << TypeNameTraits<packet_type>::name () << endl
194 <<
"LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
195 <<
"GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
196 <<
"Node: " << TypeNameTraits<node_type>::name () << endl;
198 if (this->getObjectLabel () !=
"") {
199 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl;
206 out <<
"Map:" << endl;
208 Teuchos::OSTab tab2 (out);
209 map_->describe (out, vl);
213 if (vl > Teuchos::VERB_LOW) {
214 for (
int p = 0; p < numProcs; ++p) {
216 out <<
"Process " << myRank <<
":" << endl;
217 Teuchos::OSTab tab2 (out);
218 out <<
"Export buffer size (in packets): "
219 << exports_.extent (0)
221 <<
"Import buffer size (in packets): "
222 << imports_.extent (0)
225 if (! comm.is_null ()) {
235 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
240 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
241 "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
273 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
279 const bool restrictedMode)
283 const char modeString[] =
"doImport (forward mode)";
288 const bool verbose = Behavior::verbose(
"DistObject");
289 std::unique_ptr<std::string> prefix;
291 prefix = this->createPrefix(
"DistObject", modeString);
292 std::ostringstream os;
293 os << *prefix <<
"Start" << endl;
294 std::cerr << os.str ();
296 this->doTransfer (source, importer, modeString, DoForward, CM,
299 std::ostringstream os;
300 os << *prefix <<
"Done" << endl;
301 std::cerr << os.str ();
305 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
311 const bool restrictedMode)
315 const char modeString[] =
"doExport (forward mode)";
320 const bool verbose = Behavior::verbose(
"DistObject");
321 std::unique_ptr<std::string> prefix;
323 prefix = this->createPrefix(
"DistObject", modeString);
324 std::ostringstream os;
325 os << *prefix <<
"Start" << endl;
326 std::cerr << os.str ();
328 this->doTransfer (source, exporter, modeString, DoForward, CM,
331 std::ostringstream os;
332 os << *prefix <<
"Done" << endl;
333 std::cerr << os.str ();
337 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
343 const bool restrictedMode)
347 const char modeString[] =
"doImport (reverse mode)";
352 const bool verbose = Behavior::verbose(
"DistObject");
353 std::unique_ptr<std::string> prefix;
355 prefix = this->createPrefix(
"DistObject", modeString);
356 std::ostringstream os;
357 os << *prefix <<
"Start" << endl;
358 std::cerr << os.str ();
360 this->doTransfer (source, exporter, modeString, DoReverse, CM,
363 std::ostringstream os;
364 os << *prefix <<
"Done" << endl;
365 std::cerr << os.str ();
369 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
375 const bool restrictedMode)
379 const char modeString[] =
"doExport (reverse mode)";
384 const bool verbose = Behavior::verbose(
"DistObject");
385 std::unique_ptr<std::string> prefix;
387 prefix = this->createPrefix(
"DistObject", modeString);
388 std::ostringstream os;
389 os << *prefix <<
"Start" << endl;
390 std::cerr << os.str ();
392 this->doTransfer (source, importer, modeString, DoReverse, CM,
395 std::ostringstream os;
396 os << *prefix <<
"Done" << endl;
397 std::cerr << os.str ();
401 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
405 return map_->isDistributed ();
408 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
415 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
419 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
420 const char modeString[],
429 const char funcName[] =
"Tpetra::DistObject::doTransfer";
431 ProfilingRegion region_doTransfer(funcName);
432 const bool verbose = Behavior::verbose(
"DistObject");
433 std::unique_ptr<std::string> prefix;
435 std::ostringstream os;
436 prefix = this->createPrefix(
"DistObject",
"doTransfer");
437 os << *prefix <<
"Source type: " << Teuchos::typeName(src)
438 <<
", Target type: " << Teuchos::typeName(*
this) << endl;
439 std::cerr << os.str();
452 const bool debug = Behavior::debug(
"DistObject");
454 if (! restrictedMode && revOp == DoForward) {
455 const bool myMapSameAsTransferTgtMap =
456 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
457 TEUCHOS_TEST_FOR_EXCEPTION
458 (! myMapSameAsTransferTgtMap, std::invalid_argument,
459 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
460 "communication, the target DistObject's Map must be the same "
461 "(in the sense of Tpetra::Map::isSameAs) as the input "
462 "Export/Import object's target Map.");
464 else if (! restrictedMode && revOp == DoReverse) {
465 const bool myMapSameAsTransferSrcMap =
466 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
467 TEUCHOS_TEST_FOR_EXCEPTION
468 (! myMapSameAsTransferSrcMap, std::invalid_argument,
469 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
470 "communication, the target DistObject's Map must be the same "
471 "(in the sense of Tpetra::Map::isSameAs) as the input "
472 "Export/Import object's source Map.");
474 else if (restrictedMode && revOp == DoForward) {
475 const bool myMapLocallyFittedTransferTgtMap =
476 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
477 TEUCHOS_TEST_FOR_EXCEPTION
478 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
479 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
480 "communication using restricted mode, Export/Import object's "
481 "target Map must be locally fitted (in the sense of "
482 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
485 const bool myMapLocallyFittedTransferSrcMap =
486 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
487 TEUCHOS_TEST_FOR_EXCEPTION
488 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
489 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
490 "communication using restricted mode, Export/Import object's "
491 "source Map must be locally fitted (in the sense of "
492 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
499 if (srcDistObj !=
nullptr) {
500 if (revOp == DoForward) {
501 const bool srcMapSameAsImportSrcMap =
502 srcDistObj->
getMap ()->isSameAs (* (transfer.getSourceMap ()));
503 TEUCHOS_TEST_FOR_EXCEPTION
504 (! srcMapSameAsImportSrcMap, std::invalid_argument,
505 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
506 "communication, the source DistObject's Map must be the same "
507 "as the input Export/Import object's source Map.");
510 const bool srcMapSameAsImportTgtMap =
511 srcDistObj->
getMap ()->isSameAs (* (transfer.getTargetMap ()));
512 TEUCHOS_TEST_FOR_EXCEPTION
513 (! srcMapSameAsImportTgtMap, std::invalid_argument,
514 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
515 "communication, the source DistObject's Map must be the same "
516 "as the input Export/Import object's target Map.");
521 const size_t numSameIDs = transfer.getNumSameIDs ();
524 TEUCHOS_TEST_FOR_EXCEPTION
525 (debug && restrictedMode &&
526 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
527 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
528 std::invalid_argument,
529 "Tpetra::DistObject::" << modeString <<
": Transfer object "
530 "cannot have permutes in restricted mode.");
533 const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
535 std::ostringstream os;
536 os << *prefix <<
"doTransfer: Use new interface; "
537 "commOnHost=" << (commOnHost ?
"true" :
"false") << endl;
538 std::cerr << os.str ();
541 using const_lo_dv_type =
542 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
543 const_lo_dv_type permToLIDs = (revOp == DoForward) ?
544 transfer.getPermuteToLIDs_dv () :
545 transfer.getPermuteFromLIDs_dv ();
546 const_lo_dv_type permFromLIDs = (revOp == DoForward) ?
547 transfer.getPermuteFromLIDs_dv () :
548 transfer.getPermuteToLIDs_dv ();
549 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
550 transfer.getRemoteLIDs_dv () :
551 transfer.getExportLIDs_dv ();
552 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
553 transfer.getExportLIDs_dv () :
554 transfer.getRemoteLIDs_dv ();
555 doTransferNew (src, CM, numSameIDs, permToLIDs, permFromLIDs,
556 remoteLIDs, exportLIDs, distor, revOp, commOnHost,
560 std::ostringstream os;
561 os << *prefix <<
"Tpetra::DistObject::doTransfer: Done!" << endl;
562 std::cerr << os.str ();
566 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
571 const std::string* prefix)
574 std::ostringstream os;
575 os << *prefix <<
"Realloc (if needed) imports_ from "
576 << imports_.extent (0) <<
" to " << newSize << std::endl;
577 std::cerr << os.str ();
580 const bool reallocated =
583 std::ostringstream os;
584 os << *prefix <<
"Finished realloc'ing imports_" << std::endl;
585 std::cerr << os.str ();
590 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
594 const size_t numImportLIDs)
604 constexpr
size_t tooBigFactor = 10;
606 const bool verbose = Behavior::verbose(
"DistObject");
607 std::unique_ptr<std::string> prefix;
609 prefix = this->createPrefix(
"DistObject",
610 "reallocArraysForNumPacketsPerLid");
611 std::ostringstream os;
613 <<
"numExportLIDs: " << numExportLIDs
614 <<
", numImportLIDs: " << numImportLIDs
616 os << *prefix <<
"DualView status before:" << endl
619 "numExportPacketsPerLID_")
623 "numImportPacketsPerLID_")
625 std::cerr << os.str ();
629 const bool firstReallocated =
632 "numExportPacketsPerLID",
639 const bool needFenceBeforeNextAlloc = ! firstReallocated;
640 const bool secondReallocated =
643 "numImportPacketsPerLID",
645 needFenceBeforeNextAlloc);
648 std::ostringstream os;
649 os << *prefix <<
"DualView status after:" << endl
651 "numExportPacketsPerLID_")
654 "numImportPacketsPerLID_")
656 std::cerr << os.str ();
659 return firstReallocated || secondReallocated;
662 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
667 const size_t numSameIDs,
678 const bool commOnHost,
679 const bool restrictedMode)
685 using Kokkos::Compat::getArrayView;
686 using Kokkos::Compat::getConstArrayView;
687 using Kokkos::Compat::getKokkosViewDeepCopy;
688 using Kokkos::Compat::create_const_view;
691 using DES =
typename DT::execution_space;
692 const char funcName[] =
"Tpetra::DistObject::doTransferNew";
694 ProfilingRegion region_dTN(funcName);
695 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
698 Teuchos::TimeMonitor doXferMon (*doXferTimer_);
699 #endif // HAVE_TPETRA_TRANSFER_TIMERS
701 const bool debug = Behavior::debug(
"DistObject");
702 const bool verbose = Behavior::verbose(
"DistObject");
705 std::unique_ptr<std::string> prefix;
707 prefix = this->createPrefix(
"DistObject",
"doTransferNew");
711 std::ostringstream os;
712 os << *prefix <<
"Input arguments:" << endl
714 << *prefix <<
" numSameIDs: " << numSameIDs << endl
723 << *prefix <<
" revOp: Do" << (revOp == DoReverse ?
"Reverse" :
"Forward") << endl
724 << *prefix <<
" commOnHost: " << (commOnHost ?
"true" :
"false") << endl;
725 std::cerr << os.str ();
729 ProfilingRegion region_cs (
"Tpetra::DistObject::doTransferNew::checkSizes");
731 std::ostringstream os;
732 os << *prefix <<
"1. checkSizes" << endl;
733 std::cerr << os.str ();
735 const bool checkSizesResult = this->checkSizes (src);
736 TEUCHOS_TEST_FOR_EXCEPTION
737 (! checkSizesResult, std::invalid_argument,
738 "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
739 "destination object is not a legal target for redistribution from the "
740 "source object. This probably means that they do not have the same "
741 "dimensions. For example, MultiVectors must have the same number of "
742 "rows and columns.");
749 if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
752 std::ostringstream os;
753 os << *prefix <<
"2. copyAndPermute" << endl;
754 std::cerr << os.str ();
756 ProfilingRegion region_cp
757 (
"Tpetra::DistObject::doTransferNew::copyAndPermute");
758 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
761 Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
762 #endif // HAVE_TPETRA_TRANSFER_TIMERS
764 if (numSameIDs + permuteToLIDs.extent (0) != 0) {
767 std::ostringstream os;
768 os << *prefix <<
"2. copyAndPermute" << endl;
769 std::cerr << os.str ();
771 this->copyAndPermute (src, numSameIDs, permuteToLIDs,
774 std::ostringstream os;
775 os << *prefix <<
"After copyAndPermute:" << endl
782 std::cerr << os.str ();
795 size_t constantNumPackets = this->constantNumberOfPackets ();
797 std::ostringstream os;
798 os << *prefix <<
"constantNumPackets=" << constantNumPackets << endl;
799 std::cerr << os.str ();
807 if (constantNumPackets == 0) {
809 std::ostringstream os;
810 os << *prefix <<
"3. (Re)allocate num{Ex,Im}portPacketsPerLID"
812 std::cerr << os.str ();
816 this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
817 remoteLIDs.extent (0));
821 std::ostringstream os;
822 os << *prefix <<
"4. packAndPrepare: before, "
825 std::cerr << os.str ();
828 ProfilingRegion region_pp
829 (
"Tpetra::DistObject::doTransferNew::packAndPrepare");
830 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
833 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
834 #endif // HAVE_TPETRA_TRANSFER_TIMERS
852 this->packAndPrepare (src, exportLIDs, this->exports_,
853 this->numExportPacketsPerLID_,
854 constantNumPackets, distor);
856 if (this->exports_.need_sync_host ()) {
857 this->exports_.sync_host ();
861 if (this->exports_.need_sync_device ()) {
862 this->exports_.sync_device ();
867 std::ostringstream os;
868 os << *prefix <<
"5.1. After packAndPrepare, "
871 std::cerr << os.str ();
877 if (constantNumPackets != 0) {
882 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
883 reallocImportsIfNeeded (rbufLen, verbose, prefix.get ());
887 bool needCommunication =
true;
892 if (revOp == DoReverse && ! this->isDistributed ()) {
893 needCommunication =
false;
902 else if (revOp == DoForward && srcDistObj != NULL &&
904 needCommunication =
false;
907 if (! needCommunication) {
909 std::ostringstream os;
910 os << *prefix <<
"Comm not needed; skipping" << endl;
911 std::cerr << os.str ();
915 ProfilingRegion region_dpw
916 (
"Tpetra::DistObject::doTransferNew::doPostsAndWaits");
917 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
920 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
921 #endif // HAVE_TPETRA_TRANSFER_TIMERS
924 std::ostringstream os;
925 os << *prefix <<
"7.0. "
926 << (revOp == DoReverse ?
"Reverse" :
"Forward")
928 std::cerr << os.str ();
931 if (constantNumPackets == 0) {
933 std::ostringstream os;
934 os << *prefix <<
"7.1. Variable # packets / LID: first comm "
935 <<
"(commOnHost = " << (commOnHost ?
"true" :
"false") <<
")"
937 std::cerr << os.str ();
939 size_t totalImportPackets = 0;
941 if (this->numExportPacketsPerLID_.need_sync_host ()) {
942 this->numExportPacketsPerLID_.sync_host ();
944 if (this->numImportPacketsPerLID_.need_sync_host ()) {
945 this->numImportPacketsPerLID_.sync_host ();
947 this->numImportPacketsPerLID_.modify_host ();
949 create_const_view (this->numExportPacketsPerLID_.view_host ());
950 auto numImp_h = this->numImportPacketsPerLID_.view_host ();
954 std::ostringstream os;
955 os << *prefix <<
"Call do"
956 << (revOp == DoReverse ?
"Reverse" :
"") <<
"PostsAndWaits"
958 std::cerr << os.str ();
960 if (revOp == DoReverse) {
968 std::ostringstream os;
969 os << *prefix <<
"Count totalImportPackets" << std::endl;
970 std::cerr << os.str ();
972 using the_dev_type =
typename decltype (numImp_h)::
device_type;
973 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
976 if (this->numExportPacketsPerLID_.need_sync_device ()) {
977 this->numExportPacketsPerLID_.sync_device ();
979 if (this->numImportPacketsPerLID_.need_sync_device ()) {
980 this->numImportPacketsPerLID_.sync_device ();
982 this->numImportPacketsPerLID_.modify_device ();
983 auto numExp_d = create_const_view
984 (this->numExportPacketsPerLID_.view_device ());
985 auto numImp_d = this->numImportPacketsPerLID_.view_device ();
989 std::ostringstream os;
990 os << *prefix <<
"Call do"
991 << (revOp == DoReverse ?
"Reverse" :
"") <<
"PostsAndWaits"
993 std::cerr << os.str ();
995 if (revOp == DoReverse) {
1003 std::ostringstream os;
1004 os << *prefix <<
"Count totalImportPackets" << std::endl;
1005 std::cerr << os.str ();
1007 using the_dev_type =
typename decltype (numImp_d)::
device_type;
1008 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1012 std::ostringstream os;
1013 os << *prefix <<
"totalImportPackets=" << totalImportPackets << endl;
1014 std::cerr << os.str ();
1016 this->reallocImportsIfNeeded (totalImportPackets, verbose,
1019 std::ostringstream os;
1020 os << *prefix <<
"7.3. Second comm" << std::endl;
1021 std::cerr << os.str ();
1027 if (this->numExportPacketsPerLID_.need_sync_host ()) {
1028 this->numExportPacketsPerLID_.sync_host ();
1030 if (this->numImportPacketsPerLID_.need_sync_host ()) {
1031 this->numImportPacketsPerLID_.sync_host ();
1041 auto numExportPacketsPerLID_av =
1043 auto numImportPacketsPerLID_av =
1051 this->imports_.clear_sync_state ();
1054 std::ostringstream os;
1055 os << *prefix <<
"Comm on "
1056 << (commOnHost ?
"host" :
"device")
1057 <<
"; call do" << (revOp == DoReverse ?
"Reverse" :
"")
1058 <<
"PostsAndWaits" << endl;
1059 std::cerr << os.str ();
1063 this->imports_.modify_host ();
1064 if (revOp == DoReverse) {
1066 (create_const_view (this->exports_.view_host ()),
1067 numExportPacketsPerLID_av,
1068 this->imports_.view_host (),
1069 numImportPacketsPerLID_av);
1073 (create_const_view (this->exports_.view_host ()),
1074 numExportPacketsPerLID_av,
1075 this->imports_.view_host (),
1076 numImportPacketsPerLID_av);
1081 this->imports_.modify_device ();
1082 if (revOp == DoReverse) {
1084 (create_const_view (this->exports_.view_device ()),
1085 numExportPacketsPerLID_av,
1086 this->imports_.view_device (),
1087 numImportPacketsPerLID_av);
1091 (create_const_view (this->exports_.view_device ()),
1092 numExportPacketsPerLID_av,
1093 this->imports_.view_device (),
1094 numImportPacketsPerLID_av);
1100 std::ostringstream os;
1101 os << *prefix <<
"7.1. Const # packets per LID: " << endl
1108 std::cerr << os.str ();
1115 this->imports_.clear_sync_state ();
1118 std::ostringstream os;
1119 os << *prefix <<
"7.2. Comm on "
1120 << (commOnHost ?
"host" :
"device")
1121 <<
"; call do" << (revOp == DoReverse ?
"Reverse" :
"")
1122 <<
"PostsAndWaits" << endl;
1123 std::cerr << os.str ();
1126 this->imports_.modify_host ();
1127 if (revOp == DoReverse) {
1129 (create_const_view (this->exports_.view_host ()),
1131 this->imports_.view_host ());
1135 (create_const_view (this->exports_.view_host ()),
1137 this->imports_.view_host ());
1142 this->imports_.modify_device ();
1143 if (revOp == DoReverse) {
1145 (create_const_view (this->exports_.view_device ()),
1147 this->imports_.view_device ());
1151 (create_const_view (this->exports_.view_device ()),
1153 this->imports_.view_device ());
1159 std::ostringstream os;
1160 os << *prefix <<
"8. unpackAndCombine" << endl;
1161 std::cerr << os.str ();
1163 ProfilingRegion region_uc
1164 (
"Tpetra::DistObject::doTransferNew::unpackAndCombine");
1165 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1168 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1169 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1172 std::ostringstream lclErrStrm;
1173 bool lclSuccess =
false;
1175 this->unpackAndCombine (remoteLIDs, this->imports_,
1176 this->numImportPacketsPerLID_,
1177 constantNumPackets, distor, CM);
1180 catch (std::exception& e) {
1181 lclErrStrm <<
"unpackAndCombine threw an exception: "
1182 << endl << e.what();
1185 lclErrStrm <<
"unpackAndCombine threw an exception "
1186 "not a subclass of std::exception.";
1188 const char gblErrMsgHeader[] =
"Tpetra::DistObject::"
1189 "doTransferNew threw an exception in unpackAndCombine on "
1190 "one or more processes in the DistObject's communicator.";
1191 auto comm = getMap()->getComm();
1192 Details::checkGlobalError(std::cerr, lclSuccess,
1193 lclErrStrm.str().c_str(),
1194 gblErrMsgHeader, *comm);
1197 this->unpackAndCombine (remoteLIDs, this->imports_,
1198 this->numImportPacketsPerLID_,
1199 constantNumPackets, distor, CM);
1205 std::ostringstream os;
1206 os << *prefix <<
"9. Done!" << endl;
1207 std::cerr << os.str ();
1212 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1218 const Kokkos::DualView<
1221 const Kokkos::DualView<
1226 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1231 const Kokkos::DualView<
1244 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1248 (
const Kokkos::DualView<
1263 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1268 using Teuchos::FancyOStream;
1269 using Teuchos::getFancyOStream;
1271 using Teuchos::rcpFromRef;
1274 RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os));
1275 this->describe (*out, Teuchos::VERB_DEFAULT);
1278 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1279 std::unique_ptr<std::string>
1282 const char methodName[])
const
1284 auto map = this->getMap();
1285 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1287 comm.getRawPtr(), className, methodName);
1290 template<
class DistObjectType>
1293 Teuchos::RCP<DistObjectType>& input,
1294 const Teuchos::RCP<
const Map<
1295 typename DistObjectType::local_ordinal_type,
1296 typename DistObjectType::global_ordinal_type,
1297 typename DistObjectType::node_type>>& newMap)
1299 input->removeEmptyProcessesInPlace (newMap);
1300 if (newMap.is_null ()) {
1301 input = Teuchos::null;
1305 template<
class DistObjectType>
1309 auto newMap = input->getMap ()->removeEmptyProcesses ();
1310 removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
1314 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1315 template class DistObject< SCALAR , LO , GO , NODE >;
1319 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1320 template class DistObject< char , LO , GO , NODE >;
1324 #endif // TPETRA_DISTOBJECT_DEF_HPP
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object ("forward mode").
virtual void doTransferNew(const SrcDistObject &src, const CombineMode CM, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &remoteLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Distributor &distor, const ReverseOption revOp, const bool commOnHost, const bool restrictedMode)
Implementation detail of doTransfer.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
typename Node::device_type device_type
The Kokkos Device type.
void doReversePostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the reverse communication plan.
typename::Kokkos::Details::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, Distributor &distor)
Pack data and metadata for communication (sends).
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix)
Reallocate imports_ if needed.
LocalOrdinal local_ordinal_type
The type of local indices.
Replace old values with zero.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual std::string description() const
One-line descriptiion of this object.
virtual size_t constantNumberOfPackets() const
Whether the implementation's instance promises always to have a constant number of packets per LID (l...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs)
Perform copies and permutations that are local to the calling (MPI) process.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Stand-alone utility functions and macros.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode)
Perform any unpacking and combining after communication.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Base class for distributed Tpetra objects that support data redistribution.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object's Map.
Description of Tpetra's behavior.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.