40 #ifndef TPETRA_DISTRIBUTOR_HPP
41 #define TPETRA_DISTRIBUTOR_HPP
44 #include "Teuchos_as.hpp"
45 #include "Teuchos_Describable.hpp"
46 #include "Teuchos_ParameterListAcceptorDefaultBase.hpp"
47 #include "Teuchos_VerboseObject.hpp"
50 #include "KokkosCompat_View.hpp"
51 #include "Kokkos_Core.hpp"
52 #include "Kokkos_TeuchosCommAdapters.hpp"
55 #include <type_traits>
83 DISTRIBUTOR_NOT_INITIALIZED,
84 DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS,
85 DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS,
86 DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS_N_RECVS,
87 DISTRIBUTOR_INITIALIZED_BY_REVERSE,
88 DISTRIBUTOR_INITIALIZED_BY_COPY,
176 public Teuchos::Describable,
177 public Teuchos::ParameterListAcceptorDefaultBase {
190 explicit Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm);
203 Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
204 const Teuchos::RCP<Teuchos::FancyOStream>& out);
219 Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
220 const Teuchos::RCP<Teuchos::ParameterList>& plist);
238 Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
239 const Teuchos::RCP<Teuchos::FancyOStream>& out,
240 const Teuchos::RCP<Teuchos::ParameterList>& plist);
297 size_t createFromSends (
const Teuchos::ArrayView<const int>& exportProcIDs);
332 template <
class Ordinal>
335 const Teuchos::ArrayView<const int>& remoteProcIDs,
336 Teuchos::Array<Ordinal>& exportIDs,
337 Teuchos::Array<int>& exportProcIDs);
348 const Teuchos::ArrayView<const int>& remoteProcIDs);
383 Teuchos::ArrayView<const int>
getProcsTo()
const;
408 return howInitialized_;
425 Teuchos::RCP<Distributor>
getReverse(
bool create=
true)
const;
451 template <
class Packet>
455 const Teuchos::ArrayView<Packet> &imports);
478 template <
class Packet>
481 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
482 const Teuchos::ArrayView<Packet> &imports,
483 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
509 template <
class Packet>
511 doPosts (
const Teuchos::ArrayRCP<const Packet> &exports,
513 const Teuchos::ArrayRCP<Packet> &imports);
533 template <
class Packet>
535 doPosts (
const Teuchos::ArrayRCP<const Packet> &exports,
536 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
537 const Teuchos::ArrayRCP<Packet> &imports,
538 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
552 template <
class Packet>
556 const Teuchos::ArrayView<Packet> &imports);
562 template <
class Packet>
565 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
566 const Teuchos::ArrayView<Packet> &imports,
567 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
573 template <
class Packet>
577 const Teuchos::ArrayRCP<Packet> &imports);
583 template <
class Packet>
586 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
587 const Teuchos::ArrayRCP<Packet> &imports,
588 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
618 template <
class ExpView,
class ImpView>
619 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
621 const ExpView &exports,
623 const ImpView &imports);
646 template <
class ExpView,
class ImpView>
647 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
649 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
650 const ImpView &imports,
651 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
677 template <
class ExpView,
class ImpView>
678 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
679 doPosts (
const ExpView &exports,
681 const ImpView &imports);
701 template <
class ExpView,
class ImpView>
702 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
703 doPosts (
const ExpView &exports,
704 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
705 const ImpView &imports,
706 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
712 template <
class ExpView,
class ImpView>
713 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
716 const ImpView &imports);
722 template <
class ExpView,
class ImpView>
723 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
725 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
726 const ImpView &imports,
727 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
733 template <
class ExpView,
class ImpView>
734 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
737 const ImpView &imports);
743 template <
class ExpView,
class ImpView>
744 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
746 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
747 const ImpView &imports,
748 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
754 bytes_sent = lastRoundBytesSend_;
755 bytes_recvd = lastRoundBytesRecv_;
787 describe (Teuchos::FancyOStream& out,
788 const Teuchos::EVerbosityLevel verbLevel =
789 Teuchos::Describable::verbLevel_default)
const;
794 Teuchos::RCP<const Teuchos::Comm<int> > comm_;
806 bool barrierBetween_;
809 static bool getVerbose();
815 std::unique_ptr<std::string>
816 createPrefix(
const char methodName[])
const;
819 bool verbose_ = getVerbose();
842 Teuchos::Array<int> procsTo_;
852 Teuchos::Array<size_t> startsTo_;
859 Teuchos::Array<size_t> lengthsTo_;
864 size_t maxSendLength_;
881 Teuchos::Array<size_t> indicesTo_;
900 size_t totalReceiveLength_;
907 Teuchos::Array<size_t> lengthsFrom_;
914 Teuchos::Array<int> procsFrom_;
921 Teuchos::Array<size_t> startsFrom_;
928 Teuchos::Array<size_t> indicesFrom_;
936 Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest<int> > > requests_;
942 mutable Teuchos::RCP<Distributor> reverseDistributor_;
945 size_t lastRoundBytesSend_;
948 size_t lastRoundBytesRecv_;
950 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
951 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_;
952 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_;
953 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_;
954 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_;
955 Teuchos::RCP<Teuchos::Time> timer_doWaits_;
956 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_recvs_;
957 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_recvs_;
958 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_barrier_;
959 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_barrier_;
960 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_sends_;
961 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_sends_;
962 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_sends_slow_;
963 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_sends_slow_;
964 Teuchos::RCP<Teuchos::Time> timer_doPosts3TA_sends_fast_;
965 Teuchos::RCP<Teuchos::Time> timer_doPosts4TA_sends_fast_;
966 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_recvs_;
967 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_recvs_;
968 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_barrier_;
969 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_barrier_;
970 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_sends_;
971 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_sends_;
972 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_sends_slow_;
973 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_sends_slow_;
974 Teuchos::RCP<Teuchos::Time> timer_doPosts3KV_sends_fast_;
975 Teuchos::RCP<Teuchos::Time> timer_doPosts4KV_sends_fast_;
979 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
992 bool useDistinctTags_;
998 int getTag (
const int pathTag)
const;
1010 void computeReceives ();
1024 template <
class Ordinal>
1025 void computeSends (
const Teuchos::ArrayView<const Ordinal> &remoteGIDs,
1026 const Teuchos::ArrayView<const int> &remoteProcIDs,
1027 Teuchos::Array<Ordinal> &exportGIDs,
1028 Teuchos::Array<int> &exportProcIDs);
1031 void createReverseDistributor()
const;
1039 localDescribeToString (
const Teuchos::EVerbosityLevel vl)
const;
1043 template <
class Packet>
1047 const Teuchos::ArrayView<Packet>& imports)
1049 using Teuchos::arcp;
1050 using Teuchos::ArrayRCP;
1051 typedef typename ArrayRCP<const Packet>::size_type size_type;
1053 TEUCHOS_TEST_FOR_EXCEPTION(
1054 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
1055 "doPostsAndWaits(3 args): There are " << requests_.size () <<
1056 " outstanding nonblocking messages pending. It is incorrect to call "
1057 "this method with posts outstanding.");
1069 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (),
1070 static_cast<size_type
> (0),
1071 exports.size(),
false);
1086 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false));
1089 lastRoundBytesSend_ = exports.size () *
sizeof (Packet);
1090 lastRoundBytesRecv_ = imports.size () *
sizeof (Packet);
1093 template <
class Packet>
1096 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1097 const Teuchos::ArrayView<Packet> &imports,
1098 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1100 using Teuchos::arcp;
1101 using Teuchos::ArrayRCP;
1103 TEUCHOS_TEST_FOR_EXCEPTION(
1104 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
1105 "doPostsAndWaits: There are " << requests_.size () <<
" outstanding "
1106 "nonblocking messages pending. It is incorrect to call doPostsAndWaits "
1107 "with posts outstanding.");
1120 typedef typename ArrayRCP<const Packet>::size_type size_type;
1121 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (),
1122 static_cast<size_type
> (0),
1123 exports.size (),
false);
1129 numExportPacketsPerLID,
1130 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false),
1131 numImportPacketsPerLID);
1134 lastRoundBytesSend_ = exports.size () *
sizeof (Packet);
1135 lastRoundBytesRecv_ = imports.size () *
sizeof (Packet);
1139 template <
class Packet>
1141 doPosts (
const Teuchos::ArrayRCP<const Packet>& exports,
1143 const Teuchos::ArrayRCP<Packet>& imports)
1145 using Teuchos::Array;
1146 using Teuchos::ArrayRCP;
1147 using Teuchos::ArrayView;
1149 using Teuchos::FancyOStream;
1150 using Teuchos::includesVerbLevel;
1151 using Teuchos::ireceive;
1152 using Teuchos::isend;
1153 using Teuchos::readySend;
1154 using Teuchos::send;
1155 using Teuchos::ssend;
1156 using Teuchos::TypeNameTraits;
1157 using Teuchos::typeName;
1159 using size_type = Array<size_t>::size_type;
1161 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1162 Teuchos::TimeMonitor timeMon (*timer_doPosts3TA_);
1163 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1166 const int myRank = comm_->getRank ();
1170 const bool doBarrier = barrierBetween_;
1172 std::unique_ptr<std::string> prefix;
1174 prefix = createPrefix(
"doPosts(3-arg, ArrayRCP)");
1175 std::ostringstream os;
1176 os << *prefix <<
"Start" << endl;
1177 std::cerr << os.str();
1180 TEUCHOS_TEST_FOR_EXCEPTION(
1181 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier, std::logic_error,
1182 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Ready-send "
1183 "version requires a barrier between posting receives and posting ready "
1184 "sends. This should have been checked before. "
1185 "Please report this bug to the Tpetra developers.");
1187 size_t selfReceiveOffset = 0;
1192 if (howInitialized_ != Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE) {
1199 const size_t totalNumImportPackets = totalReceiveLength_ * numPackets;
1200 TEUCHOS_TEST_FOR_EXCEPTION
1201 (static_cast<size_t> (imports.size ()) < totalNumImportPackets,
1202 std::invalid_argument,
1203 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1204 "The 'imports' array must have enough entries to hold the expected number "
1205 "of import packets. imports.size() = " << imports.size () <<
" < "
1206 "totalNumImportPackets = " << totalNumImportPackets <<
".");
1214 const int pathTag = 0;
1215 const int tag = this->getTag (pathTag);
1218 TEUCHOS_TEST_FOR_EXCEPTION
1219 (requests_.size () != 0,
1221 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Process "
1222 << myRank <<
": requests_.size() = " << requests_.size () <<
" != 0.");
1238 const size_type actualNumReceives = as<size_type> (numReceives_) +
1239 as<size_type> (selfMessage_ ? 1 : 0);
1240 requests_.resize (0);
1243 std::ostringstream os;
1244 os << *prefix << (indicesTo_.empty () ?
"Fast" :
"Slow")
1245 <<
": Post receives" << endl;
1246 std::cerr << os.str();
1255 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1256 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3TA_recvs_);
1257 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1259 size_t curBufOffset = 0;
1260 for (size_type i = 0; i < actualNumReceives; ++i) {
1261 const size_t curBufLen = lengthsFrom_[i] * numPackets;
1262 if (procsFrom_[i] != myRank) {
1264 std::ostringstream os;
1265 os << *prefix << (indicesTo_.empty () ?
"Fast" :
"Slow")
1266 <<
": Post irecv: {source: " << procsFrom_[i]
1267 <<
", tag: " << tag <<
"}" << endl;
1268 std::cerr << os.str();
1277 TEUCHOS_TEST_FOR_EXCEPTION(
1278 curBufOffset + curBufLen > static_cast<size_t> (imports.size ()),
1280 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1281 "Exceeded size of 'imports' array in packing loop on Process " <<
1282 myRank <<
". imports.size() = " << imports.size () <<
" < "
1283 "curBufOffset(" << curBufOffset <<
") + curBufLen(" << curBufLen
1285 ArrayRCP<Packet> recvBuf =
1286 imports.persistingView (curBufOffset, curBufLen);
1287 requests_.push_back (ireceive<int, Packet> (recvBuf, procsFrom_[i],
1291 selfReceiveOffset = curBufOffset;
1293 curBufOffset += curBufLen;
1298 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1299 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts3TA_barrier_);
1300 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1303 std::ostringstream os;
1304 os << *prefix << (indicesTo_.empty () ?
"Fast" :
"Slow")
1305 <<
": Barrier" << endl;
1306 std::cerr << os.str();
1316 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1317 Teuchos::TimeMonitor timeMonSends (*timer_doPosts3TA_sends_);
1318 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1325 size_t numBlocks = numSends_ + selfMessage_;
1326 size_t procIndex = 0;
1327 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myRank)) {
1330 if (procIndex == numBlocks) {
1335 size_t selfIndex = 0;
1338 std::ostringstream os;
1339 os << *prefix << (indicesTo_.empty () ?
"Fast" :
"Slow")
1340 <<
": Post sends" << endl;
1341 std::cerr << os.str();
1344 if (indicesTo_.empty ()) {
1346 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1347 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts3TA_sends_fast_);
1348 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1352 for (
size_t i = 0; i < numBlocks; ++i) {
1353 size_t p = i + procIndex;
1354 if (p > (numBlocks - 1)) {
1358 if (procsTo_[p] != myRank) {
1360 std::ostringstream os;
1361 os << *prefix <<
": Post send: {target: "
1362 << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
1363 std::cerr << os.str();
1366 ArrayView<const Packet> tmpSend =
1367 exports.view (startsTo_[p]*numPackets, lengthsTo_[p]*numPackets);
1369 if (sendType == Details::DISTRIBUTOR_SEND) {
1370 send<int, Packet> (tmpSend.getRawPtr (),
1371 as<int> (tmpSend.size ()),
1372 procsTo_[p], tag, *comm_);
1374 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1375 ArrayRCP<const Packet> tmpSendBuf =
1376 exports.persistingView (startsTo_[p] * numPackets,
1377 lengthsTo_[p] * numPackets);
1378 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1381 else if (sendType == Details::DISTRIBUTOR_RSEND) {
1382 readySend<int, Packet> (tmpSend.getRawPtr (),
1383 as<int> (tmpSend.size ()),
1384 procsTo_[p], tag, *comm_);
1386 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1387 ssend<int, Packet> (tmpSend.getRawPtr (),
1388 as<int> (tmpSend.size ()),
1389 procsTo_[p], tag, *comm_);
1391 TEUCHOS_TEST_FOR_EXCEPTION(
1392 true, std::logic_error,
1393 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1394 "Invalid send type. We should never get here. "
1395 "Please report this bug to the Tpetra developers.");
1405 std::ostringstream os;
1406 os << *prefix <<
"Fast: Self-send" << endl;
1407 std::cerr << os.str();
1416 std::copy (exports.begin()+startsTo_[selfNum]*numPackets,
1417 exports.begin()+startsTo_[selfNum]*numPackets+lengthsTo_[selfNum]*numPackets,
1418 imports.begin()+selfReceiveOffset);
1423 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1424 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts3TA_sends_slow_);
1425 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1429 ArrayRCP<Packet> sendArray (maxSendLength_ * numPackets);
1431 TEUCHOS_TEST_FOR_EXCEPTION(
1432 sendType == Details::DISTRIBUTOR_ISEND, std::logic_error,
1433 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1434 "The \"send buffer\" code path doesn't currently work with "
1435 "nonblocking sends.");
1437 for (
size_t i = 0; i < numBlocks; ++i) {
1438 size_t p = i + procIndex;
1439 if (p > (numBlocks - 1)) {
1443 if (procsTo_[p] != myRank) {
1445 std::ostringstream os;
1446 os << *prefix <<
"Slow: Post send: "
1447 "{target: " << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
1448 std::cerr << os.str();
1451 typename ArrayView<const Packet>::iterator srcBegin, srcEnd;
1452 size_t sendArrayOffset = 0;
1453 size_t j = startsTo_[p];
1454 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
1455 srcBegin = exports.begin() + indicesTo_[j]*numPackets;
1456 srcEnd = srcBegin + numPackets;
1457 std::copy (srcBegin, srcEnd, sendArray.begin()+sendArrayOffset);
1458 sendArrayOffset += numPackets;
1460 ArrayView<const Packet> tmpSend =
1461 sendArray.view (0, lengthsTo_[p]*numPackets);
1463 if (sendType == Details::DISTRIBUTOR_SEND) {
1464 send<int, Packet> (tmpSend.getRawPtr (),
1465 as<int> (tmpSend.size ()),
1466 procsTo_[p], tag, *comm_);
1468 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1469 ArrayRCP<const Packet> tmpSendBuf =
1470 sendArray.persistingView (0, lengthsTo_[p] * numPackets);
1471 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1474 else if (sendType == Details::DISTRIBUTOR_RSEND) {
1475 readySend<int, Packet> (tmpSend.getRawPtr (),
1476 as<int> (tmpSend.size ()),
1477 procsTo_[p], tag, *comm_);
1479 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1480 ssend<int, Packet> (tmpSend.getRawPtr (),
1481 as<int> (tmpSend.size ()),
1482 procsTo_[p], tag, *comm_);
1485 TEUCHOS_TEST_FOR_EXCEPTION(
1486 true, std::logic_error,
1487 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1488 "Invalid send type. We should never get here. "
1489 "Please report this bug to the Tpetra developers.");
1494 selfIndex = startsTo_[p];
1500 std::ostringstream os;
1501 os << *prefix <<
"Slow: Self-send" << endl;
1502 std::cerr << os.str();
1504 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
1505 std::copy (exports.begin()+indicesTo_[selfIndex]*numPackets,
1506 exports.begin()+indicesTo_[selfIndex]*numPackets + numPackets,
1507 imports.begin() + selfReceiveOffset);
1509 selfReceiveOffset += numPackets;
1515 std::ostringstream os;
1516 os << *prefix <<
"Done!" << endl;
1517 std::cerr << os.str();
1521 template <
class Packet>
1523 doPosts (
const Teuchos::ArrayRCP<const Packet>& exports,
1524 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1525 const Teuchos::ArrayRCP<Packet>& imports,
1526 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1528 using Teuchos::Array;
1529 using Teuchos::ArrayRCP;
1530 using Teuchos::ArrayView;
1532 using Teuchos::ireceive;
1533 using Teuchos::isend;
1534 using Teuchos::readySend;
1535 using Teuchos::send;
1536 using Teuchos::ssend;
1537 using Teuchos::TypeNameTraits;
1539 typedef Array<size_t>::size_type size_type;
1541 std::unique_ptr<std::string> prefix;
1543 prefix = createPrefix(
"doPosts(4-arg, Teuchos)");
1544 std::ostringstream os;
1545 os << *prefix <<
"Start" << endl;
1546 std::cerr << os.str();
1549 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1550 Teuchos::TimeMonitor timeMon (*timer_doPosts4TA_);
1551 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1556 const bool doBarrier = barrierBetween_;
1580 TEUCHOS_TEST_FOR_EXCEPTION(
1581 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
1583 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): Ready-send "
1584 "version requires a barrier between posting receives and posting ready "
1585 "ends. This should have been checked before. "
1586 "Please report this bug to the Tpetra developers.");
1588 const int myProcID = comm_->getRank ();
1589 size_t selfReceiveOffset = 0;
1591 #ifdef HAVE_TEUCHOS_DEBUG
1593 size_t totalNumImportPackets = 0;
1594 for (
size_t ii = 0; ii < static_cast<size_t> (numImportPacketsPerLID.size ()); ++ii) {
1595 totalNumImportPackets += numImportPacketsPerLID[ii];
1597 TEUCHOS_TEST_FOR_EXCEPTION(
1598 static_cast<size_t> (imports.size ()) < totalNumImportPackets,
1600 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): The 'imports' "
1601 "array must have enough entries to hold the expected number of import "
1602 "packets. imports.size() = " << imports.size() <<
" < "
1603 "totalNumImportPackets = " << totalNumImportPackets <<
".");
1604 #endif // HAVE_TEUCHOS_DEBUG
1611 const int pathTag = 1;
1612 const int tag = this->getTag (pathTag);
1614 #ifdef HAVE_TEUCHOS_DEBUG
1615 TEUCHOS_TEST_FOR_EXCEPTION
1616 (requests_.size () != 0,
1618 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): Process "
1619 << myProcID <<
": requests_.size() = " << requests_.size ()
1621 #endif // HAVE_TEUCHOS_DEBUG
1623 std::ostringstream os;
1624 os << *prefix << (indicesTo_.empty () ?
"fast" :
"slow")
1626 std::cerr << os.str();
1642 const size_type actualNumReceives = as<size_type> (numReceives_) +
1643 as<size_type> (selfMessage_ ? 1 : 0);
1644 requests_.resize (0);
1652 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1653 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4TA_recvs_);
1654 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1656 size_t curBufferOffset = 0;
1657 size_t curLIDoffset = 0;
1658 for (size_type i = 0; i < actualNumReceives; ++i) {
1659 size_t totalPacketsFrom_i = 0;
1660 for (
size_t j = 0; j < lengthsFrom_[i]; ++j) {
1661 totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
1663 curLIDoffset += lengthsFrom_[i];
1664 if (procsFrom_[i] != myProcID && totalPacketsFrom_i) {
1673 ArrayRCP<Packet> recvBuf =
1674 imports.persistingView (curBufferOffset, totalPacketsFrom_i);
1675 requests_.push_back (ireceive<int, Packet> (recvBuf, procsFrom_[i],
1679 selfReceiveOffset = curBufferOffset;
1681 curBufferOffset += totalPacketsFrom_i;
1686 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1687 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts4TA_barrier_);
1688 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1697 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1698 Teuchos::TimeMonitor timeMonSends (*timer_doPosts4TA_sends_);
1699 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1703 Array<size_t> sendPacketOffsets(numSends_,0), packetsPerSend(numSends_,0);
1704 size_t maxNumPackets = 0;
1705 size_t curPKToffset = 0;
1706 for (
size_t pp=0; pp<numSends_; ++pp) {
1707 sendPacketOffsets[pp] = curPKToffset;
1708 size_t numPackets = 0;
1709 for (
size_t j=startsTo_[pp]; j<startsTo_[pp]+lengthsTo_[pp]; ++j) {
1710 numPackets += numExportPacketsPerLID[j];
1712 if (numPackets > maxNumPackets) maxNumPackets = numPackets;
1713 packetsPerSend[pp] = numPackets;
1714 curPKToffset += numPackets;
1719 size_t numBlocks = numSends_+ selfMessage_;
1720 size_t procIndex = 0;
1721 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myProcID)) {
1724 if (procIndex == numBlocks) {
1729 size_t selfIndex = 0;
1731 if (indicesTo_.empty()) {
1733 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1734 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4TA_sends_fast_);
1735 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1738 std::ostringstream os;
1739 os << *prefix <<
"fast path: posting sends" << endl;
1740 std::cerr << os.str();
1745 for (
size_t i = 0; i < numBlocks; ++i) {
1746 size_t p = i + procIndex;
1747 if (p > (numBlocks - 1)) {
1751 if (procsTo_[p] != myProcID && packetsPerSend[p] > 0) {
1752 ArrayView<const Packet> tmpSend =
1753 exports.view (sendPacketOffsets[p], packetsPerSend[p]);
1755 if (sendType == Details::DISTRIBUTOR_SEND) {
1756 send<int, Packet> (tmpSend.getRawPtr (),
1757 as<int> (tmpSend.size ()),
1758 procsTo_[p], tag, *comm_);
1760 else if (sendType == Details::DISTRIBUTOR_RSEND) {
1761 readySend<int, Packet> (tmpSend.getRawPtr (),
1762 as<int> (tmpSend.size ()),
1763 procsTo_[p], tag, *comm_);
1765 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1766 ArrayRCP<const Packet> tmpSendBuf =
1767 exports.persistingView (sendPacketOffsets[p], packetsPerSend[p]);
1768 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1771 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1772 ssend<int, Packet> (tmpSend.getRawPtr (),
1773 as<int> (tmpSend.size ()),
1774 procsTo_[p], tag, *comm_);
1777 TEUCHOS_TEST_FOR_EXCEPTION(
1778 true, std::logic_error,
1779 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): "
1780 "Invalid send type. We should never get here. Please report "
1781 "this bug to the Tpetra developers.");
1790 std::copy (exports.begin()+sendPacketOffsets[selfNum],
1791 exports.begin()+sendPacketOffsets[selfNum]+packetsPerSend[selfNum],
1792 imports.begin()+selfReceiveOffset);
1795 std::ostringstream os;
1796 os << *prefix <<
"fast path: done" << endl;
1797 std::cerr << os.str();
1802 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1803 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4TA_sends_slow_);
1804 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
1807 std::ostringstream os;
1808 os << *prefix <<
"slow path: posting sends" << endl;
1809 std::cerr << os.str();
1813 ArrayRCP<Packet> sendArray (maxNumPackets);
1815 TEUCHOS_TEST_FOR_EXCEPTION(
1816 sendType == Details::DISTRIBUTOR_ISEND,
1818 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): "
1819 "The \"send buffer\" code path may not necessarily work with nonblocking sends.");
1821 Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
1823 for (
int j=0; j<numExportPacketsPerLID.size(); ++j) {
1824 indicesOffsets[j] = ioffset;
1825 ioffset += numExportPacketsPerLID[j];
1828 for (
size_t i = 0; i < numBlocks; ++i) {
1829 size_t p = i + procIndex;
1830 if (p > (numBlocks - 1)) {
1834 if (procsTo_[p] != myProcID) {
1835 typename ArrayView<const Packet>::iterator srcBegin, srcEnd;
1836 size_t sendArrayOffset = 0;
1837 size_t j = startsTo_[p];
1838 size_t numPacketsTo_p = 0;
1839 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
1840 srcBegin = exports.begin() + indicesOffsets[j];
1841 srcEnd = srcBegin + numExportPacketsPerLID[j];
1842 numPacketsTo_p += numExportPacketsPerLID[j];
1843 std::copy (srcBegin, srcEnd, sendArray.begin()+sendArrayOffset);
1844 sendArrayOffset += numExportPacketsPerLID[j];
1846 if (numPacketsTo_p > 0) {
1847 ArrayView<const Packet> tmpSend =
1848 sendArray.view (0, numPacketsTo_p);
1850 if (sendType == Details::DISTRIBUTOR_RSEND) {
1851 readySend<int, Packet> (tmpSend.getRawPtr (),
1852 as<int> (tmpSend.size ()),
1853 procsTo_[p], tag, *comm_);
1855 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1856 ArrayRCP<const Packet> tmpSendBuf =
1857 sendArray.persistingView (0, numPacketsTo_p);
1858 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1861 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1862 ssend<int, Packet> (tmpSend.getRawPtr (),
1863 as<int> (tmpSend.size ()),
1864 procsTo_[p], tag, *comm_);
1867 send<int, Packet> (tmpSend.getRawPtr (),
1868 as<int> (tmpSend.size ()),
1869 procsTo_[p], tag, *comm_);
1875 selfIndex = startsTo_[p];
1880 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
1881 std::copy (exports.begin()+indicesOffsets[selfIndex],
1882 exports.begin()+indicesOffsets[selfIndex]+numExportPacketsPerLID[selfIndex],
1883 imports.begin() + selfReceiveOffset);
1884 selfReceiveOffset += numExportPacketsPerLID[selfIndex];
1889 std::ostringstream os;
1890 os << *prefix <<
"slow path: done" << endl;
1891 std::cerr << os.str();
1896 template <
class Packet>
1900 const Teuchos::ArrayView<Packet>& imports)
1902 using Teuchos::arcp;
1903 using Teuchos::ArrayRCP;
1916 typedef typename ArrayRCP<const Packet>::size_type size_type;
1917 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr(), as<size_type> (0),
1918 exports.size(),
false);
1925 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false));
1928 lastRoundBytesSend_ = exports.size() *
sizeof(Packet);
1929 lastRoundBytesRecv_ = imports.size() *
sizeof(Packet);
1932 template <
class Packet>
1935 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1936 const Teuchos::ArrayView<Packet> &imports,
1937 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1940 using Teuchos::arcp;
1941 using Teuchos::ArrayRCP;
1943 TEUCHOS_TEST_FOR_EXCEPTION(
1944 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
1945 "doReversePostsAndWaits(4 args): There are " << requests_.size ()
1946 <<
" outstanding nonblocking messages pending. It is incorrect to call "
1947 "this method with posts outstanding.");
1960 typedef typename ArrayRCP<const Packet>::size_type size_type;
1961 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (), as<size_type> (0),
1962 exports.size (),
false);
1964 numExportPacketsPerLID,
1965 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false),
1966 numImportPacketsPerLID);
1969 lastRoundBytesSend_ = exports.size() *
sizeof(Packet);
1970 lastRoundBytesRecv_ = imports.size() *
sizeof(Packet);
1973 template <
class Packet>
1977 const Teuchos::ArrayRCP<Packet>& imports)
1980 TEUCHOS_TEST_FOR_EXCEPTION(
1981 ! indicesTo_.empty (), std::runtime_error,
1982 "Tpetra::Distributor::doReversePosts(3 args): Can only do reverse "
1983 "communication when original data are blocked by process.");
1984 if (reverseDistributor_.is_null ()) {
1985 createReverseDistributor ();
1987 reverseDistributor_->doPosts (exports, numPackets, imports);
1990 template <
class Packet>
1993 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1994 const Teuchos::ArrayRCP<Packet>& imports,
1995 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1998 TEUCHOS_TEST_FOR_EXCEPTION(
1999 ! indicesTo_.empty (), std::runtime_error,
2000 "Tpetra::Distributor::doReversePosts(3 args): Can only do reverse "
2001 "communication when original data are blocked by process.");
2002 if (reverseDistributor_.is_null ()) {
2003 createReverseDistributor ();
2005 reverseDistributor_->doPosts (exports, numExportPacketsPerLID,
2006 imports, numImportPacketsPerLID);
2009 template <
class ExpView,
class ImpView>
2010 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2012 doPostsAndWaits (
const ExpView& exports,
2014 const ImpView& imports)
2020 std::unique_ptr<std::string> prefix;
2022 prefix = createPrefix(
"doPostsAndWaits(3-arg, Kokkos)");
2023 std::ostringstream os;
2024 os << *prefix <<
"sendType: "
2025 << DistributorSendTypeEnumToString(sendType_)
2026 <<
", barrierBetween: "
2027 << (barrierBetween_ ?
"true" :
"false") << endl;
2028 std::cerr << os.str();
2031 TEUCHOS_TEST_FOR_EXCEPTION(
2032 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
2033 "doPostsAndWaits(3 args): There are " << requests_.size () <<
2034 " outstanding nonblocking messages pending. It is incorrect to call "
2035 "this method with posts outstanding.");
2038 std::ostringstream os;
2039 os << *prefix <<
"Call doPosts" << endl;
2040 std::cerr << os.str();
2042 doPosts (exports, numPackets, imports);
2044 std::ostringstream os;
2045 os << *prefix <<
"Call doWaits" << endl;
2046 std::cerr << os.str();
2050 std::ostringstream os;
2051 os << *prefix <<
"Done" << endl;
2052 std::cerr << os.str();
2056 template <
class ExpView,
class ImpView>
2057 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2059 doPostsAndWaits(
const ExpView& exports,
2060 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2061 const ImpView& imports,
2062 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2065 const char rawPrefix[] =
"doPostsAndWaits(4-arg, Kokkos)";
2067 std::unique_ptr<std::string> prefix;
2069 prefix = createPrefix(rawPrefix);
2070 std::ostringstream os;
2071 os << *prefix <<
"Start" << endl;
2072 std::cerr << os.str();
2074 TEUCHOS_TEST_FOR_EXCEPTION
2075 (requests_.size() != 0, std::runtime_error,
2076 "Tpetra::Distributor::" << rawPrefix <<
": There is/are "
2077 << requests_.size() <<
" outstanding nonblocking message(s) "
2078 "pending. It is incorrect to call this method with posts "
2080 doPosts(exports, numExportPacketsPerLID, imports, numImportPacketsPerLID);
2083 std::ostringstream os;
2084 os << *prefix <<
"Done" << endl;
2085 std::cerr << os.str();
2090 template <
class ExpView,
class ImpView>
2091 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2093 doPosts (
const ExpView &exports,
2095 const ImpView &imports)
2097 using Teuchos::Array;
2099 using Teuchos::FancyOStream;
2100 using Teuchos::includesVerbLevel;
2101 using Teuchos::ireceive;
2102 using Teuchos::isend;
2103 using Teuchos::readySend;
2104 using Teuchos::send;
2105 using Teuchos::ssend;
2106 using Teuchos::TypeNameTraits;
2107 using Teuchos::typeName;
2109 using Kokkos::Compat::create_const_view;
2110 using Kokkos::Compat::create_view;
2111 using Kokkos::Compat::subview_offset;
2112 using Kokkos::Compat::deep_copy_offset;
2113 typedef Array<size_t>::size_type size_type;
2114 typedef ExpView exports_view_type;
2115 typedef ImpView imports_view_type;
2117 #ifdef KOKKOS_ENABLE_CUDA
2119 (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
2120 ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
2121 "Please do not use Tpetra::Distributor with UVM allocations. "
2122 "See Trilinos GitHub issue #1088.");
2123 #endif // KOKKOS_ENABLE_CUDA
2125 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2126 Teuchos::TimeMonitor timeMon (*timer_doPosts3KV_);
2127 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2129 const int myRank = comm_->getRank ();
2133 const bool doBarrier = barrierBetween_;
2135 std::unique_ptr<std::string> prefix;
2137 prefix = createPrefix(
"doPosts(3-arg, Kokkos)");
2138 std::ostringstream os;
2139 os << *prefix <<
"Start" << endl;
2140 std::cerr << os.str();
2143 TEUCHOS_TEST_FOR_EXCEPTION(
2144 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
2146 "Tpetra::Distributor::doPosts(3 args, Kokkos): Ready-send version "
2147 "requires a barrier between posting receives and posting ready sends. "
2148 "This should have been checked before. "
2149 "Please report this bug to the Tpetra developers.");
2151 size_t selfReceiveOffset = 0;
2158 const size_t totalNumImportPackets = totalReceiveLength_ * numPackets;
2161 std::ostringstream os;
2162 os << *prefix <<
"totalNumImportPackets = " <<
2163 totalNumImportPackets <<
" = " << totalReceiveLength_ <<
" * " <<
2164 numPackets <<
"; imports.extent(0) = " << imports.extent (0)
2166 std::cerr << os.str();
2169 #ifdef HAVE_TPETRA_DEBUG
2172 const size_t importBufSize =
static_cast<size_t> (imports.extent (0));
2173 const int lclBad = (importBufSize < totalNumImportPackets) ? 1 : 0;
2175 using Teuchos::reduceAll;
2176 using Teuchos::REDUCE_MAX;
2177 using Teuchos::outArg;
2178 reduceAll (*comm_, REDUCE_MAX, lclBad, outArg (gblBad));
2179 TEUCHOS_TEST_FOR_EXCEPTION
2182 "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2183 "On one or more MPI processes, the 'imports' array "
2184 "does not have enough entries to hold the expected number of "
2185 "import packets. ");
2188 TEUCHOS_TEST_FOR_EXCEPTION
2189 (static_cast<size_t> (imports.extent (0)) < totalNumImportPackets,
2191 "Tpetra::Distributor::doPosts(3 args, Kokkos): The 'imports' "
2192 "array must have enough entries to hold the expected number of import "
2193 "packets. imports.extent(0) = " << imports.extent (0) <<
" < "
2194 "totalNumImportPackets = " << totalNumImportPackets <<
" = "
2195 "totalReceiveLength_ (" << totalReceiveLength_ <<
") * numPackets ("
2196 << numPackets <<
").");
2197 #endif // HAVE_TPETRA_DEBUG
2205 const int pathTag = 0;
2206 const int tag = this->getTag (pathTag);
2208 #ifdef HAVE_TPETRA_DEBUG
2209 TEUCHOS_TEST_FOR_EXCEPTION
2210 (requests_.size () != 0,
2212 "Tpetra::Distributor::doPosts(3 args, Kokkos): Process "
2213 << myRank <<
": requests_.size() = " << requests_.size () <<
" != 0.");
2214 #endif // HAVE_TPETRA_DEBUG
2229 const size_type actualNumReceives = as<size_type> (numReceives_) +
2230 as<size_type> (selfMessage_ ? 1 : 0);
2231 requests_.resize (0);
2234 std::ostringstream os;
2235 os << *prefix << (indicesTo_.empty() ?
"fast" :
"slow")
2236 <<
" path: post receives" << endl;
2237 std::cerr << os.str();
2246 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2247 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3KV_recvs_);
2248 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2250 size_t curBufferOffset = 0;
2251 for (size_type i = 0; i < actualNumReceives; ++i) {
2252 const size_t curBufLen = lengthsFrom_[i] * numPackets;
2253 if (procsFrom_[i] != myRank) {
2255 std::ostringstream os;
2257 << (indicesTo_.empty() ?
"fast" :
"slow") <<
" path: "
2258 <<
"post irecv: {source: " << procsFrom_[i]
2259 <<
", tag: " << tag <<
"}" << endl;
2260 std::cerr << os.str();
2269 TEUCHOS_TEST_FOR_EXCEPTION(
2270 curBufferOffset + curBufLen > static_cast<size_t> (imports.size ()),
2271 std::logic_error,
"Tpetra::Distributor::doPosts(3 args, Kokkos): "
2272 "Exceeded size of 'imports' array in packing loop on Process " <<
2273 myRank <<
". imports.size() = " << imports.size () <<
" < "
2274 "curBufferOffset(" << curBufferOffset <<
") + curBufLen(" <<
2276 imports_view_type recvBuf =
2277 subview_offset (imports, curBufferOffset, curBufLen);
2278 requests_.push_back (ireceive<int> (recvBuf, procsFrom_[i],
2282 selfReceiveOffset = curBufferOffset;
2284 curBufferOffset += curBufLen;
2289 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2290 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts3KV_barrier_);
2291 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2294 std::ostringstream os;
2295 os << *prefix << (indicesTo_.empty() ?
"fast" :
"slow")
2296 <<
" path: barrier" << endl;
2297 std::cerr << os.str();
2307 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2308 Teuchos::TimeMonitor timeMonSends (*timer_doPosts3KV_sends_);
2309 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2316 size_t numBlocks = numSends_ + selfMessage_;
2317 size_t procIndex = 0;
2318 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myRank)) {
2321 if (procIndex == numBlocks) {
2326 size_t selfIndex = 0;
2329 std::ostringstream os;
2330 os << *prefix << (indicesTo_.empty() ?
"fast" :
"slow")
2331 <<
" path: post sends" << endl;
2332 std::cerr << os.str();
2335 if (indicesTo_.empty()) {
2337 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2338 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts3KV_sends_fast_);
2339 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2342 std::ostringstream os;
2343 os << *prefix <<
"fast path: posting sends" << endl;
2344 std::cerr << os.str();
2349 for (
size_t i = 0; i < numBlocks; ++i) {
2350 size_t p = i + procIndex;
2351 if (p > (numBlocks - 1)) {
2355 if (procsTo_[p] != myRank) {
2357 std::ostringstream os;
2358 os << *prefix <<
"fast path: post send: {target: "
2359 << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
2360 std::cerr << os.str();
2362 exports_view_type tmpSend = subview_offset(
2363 exports, startsTo_[p]*numPackets, lengthsTo_[p]*numPackets);
2365 if (sendType == Details::DISTRIBUTOR_SEND) {
2367 as<int> (tmpSend.size ()),
2368 procsTo_[p], tag, *comm_);
2370 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2371 exports_view_type tmpSendBuf =
2372 subview_offset (exports, startsTo_[p] * numPackets,
2373 lengthsTo_[p] * numPackets);
2374 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2377 else if (sendType == Details::DISTRIBUTOR_RSEND) {
2378 readySend<int> (tmpSend,
2379 as<int> (tmpSend.size ()),
2380 procsTo_[p], tag, *comm_);
2382 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2383 ssend<int> (tmpSend,
2384 as<int> (tmpSend.size ()),
2385 procsTo_[p], tag, *comm_);
2387 TEUCHOS_TEST_FOR_EXCEPTION(
2390 "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2391 "Invalid send type. We should never get here. "
2392 "Please report this bug to the Tpetra developers.");
2402 std::ostringstream os;
2403 os << *prefix <<
"fast path: self-send" << endl;
2404 std::cerr << os.str();
2413 deep_copy_offset(imports, exports, selfReceiveOffset,
2414 startsTo_[selfNum]*numPackets,
2415 lengthsTo_[selfNum]*numPackets);
2418 std::ostringstream os;
2419 os << *prefix <<
"fast path: done" << endl;
2420 std::cerr << os.str();
2425 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2426 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts3KV_sends_slow_);
2427 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2430 std::ostringstream os;
2431 os << *prefix <<
"slow path: posting sends" << endl;
2432 std::cerr << os.str();
2434 typedef typename ExpView::non_const_value_type Packet;
2435 typedef typename ExpView::array_layout Layout;
2436 typedef typename ExpView::device_type Device;
2437 typedef typename ExpView::memory_traits Mem;
2438 Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray",
2439 maxSendLength_ * numPackets);
2443 TEUCHOS_TEST_FOR_EXCEPTION(
2444 sendType == Details::DISTRIBUTOR_ISEND,
2446 "Tpetra::Distributor::doPosts(3 args, Kokkos): The \"send buffer\" code path "
2447 "doesn't currently work with nonblocking sends.");
2449 for (
size_t i = 0; i < numBlocks; ++i) {
2450 size_t p = i + procIndex;
2451 if (p > (numBlocks - 1)) {
2455 if (procsTo_[p] != myRank) {
2457 std::ostringstream os;
2458 os << *prefix <<
"slow path: post send: {target: "
2459 << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
2460 std::cerr << os.str();
2463 size_t sendArrayOffset = 0;
2464 size_t j = startsTo_[p];
2465 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
2466 deep_copy_offset(sendArray, exports, sendArrayOffset,
2467 indicesTo_[j]*numPackets, numPackets);
2468 sendArrayOffset += numPackets;
2471 subview_offset(sendArray,
size_t(0), lengthsTo_[p]*numPackets);
2473 if (sendType == Details::DISTRIBUTOR_SEND) {
2475 as<int> (tmpSend.size ()),
2476 procsTo_[p], tag, *comm_);
2478 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2479 exports_view_type tmpSendBuf =
2480 subview_offset (sendArray,
size_t(0), lengthsTo_[p] * numPackets);
2481 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2484 else if (sendType == Details::DISTRIBUTOR_RSEND) {
2485 readySend<int> (tmpSend,
2486 as<int> (tmpSend.size ()),
2487 procsTo_[p], tag, *comm_);
2489 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2490 ssend<int> (tmpSend,
2491 as<int> (tmpSend.size ()),
2492 procsTo_[p], tag, *comm_);
2495 TEUCHOS_TEST_FOR_EXCEPTION(
2498 "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2499 "Invalid send type. We should never get here. "
2500 "Please report this bug to the Tpetra developers.");
2505 selfIndex = startsTo_[p];
2511 std::ostringstream os;
2512 os << *prefix <<
"slow path: self-send" << endl;
2513 std::cerr << os.str();
2515 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
2516 deep_copy_offset(imports, exports, selfReceiveOffset,
2517 indicesTo_[selfIndex]*numPackets, numPackets);
2519 selfReceiveOffset += numPackets;
2523 std::ostringstream os;
2524 os << *prefix <<
"slow path: done" << endl;
2525 std::cerr << os.str();
2530 std::ostringstream os;
2531 os << *prefix <<
"Done" << endl;
2532 std::cerr << os.str();
2536 template <
class ExpView,
class ImpView>
2537 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2539 doPosts (
const ExpView &exports,
2540 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2541 const ImpView &imports,
2542 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2544 using Teuchos::Array;
2546 using Teuchos::ireceive;
2547 using Teuchos::isend;
2548 using Teuchos::readySend;
2549 using Teuchos::send;
2550 using Teuchos::ssend;
2551 using Teuchos::TypeNameTraits;
2553 using Kokkos::Compat::create_const_view;
2554 using Kokkos::Compat::create_view;
2555 using Kokkos::Compat::subview_offset;
2556 using Kokkos::Compat::deep_copy_offset;
2557 typedef Array<size_t>::size_type size_type;
2558 typedef ExpView exports_view_type;
2559 typedef ImpView imports_view_type;
2561 #ifdef KOKKOS_ENABLE_CUDA
2562 static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
2563 ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
2564 "Please do not use Tpetra::Distributor with UVM "
2565 "allocations. See GitHub issue #1088.");
2566 #endif // KOKKOS_ENABLE_CUDA
2568 std::unique_ptr<std::string> prefix;
2570 prefix = createPrefix(
"doPosts(4-arg, Kokkos)");
2571 std::ostringstream os;
2572 os << *prefix <<
"Start" << endl;
2573 std::cerr << os.str();
2576 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2577 Teuchos::TimeMonitor timeMon (*timer_doPosts4KV_);
2578 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2583 const bool doBarrier = barrierBetween_;
2607 TEUCHOS_TEST_FOR_EXCEPTION(
2608 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
2609 std::logic_error,
"Tpetra::Distributor::doPosts(4 args, Kokkos): Ready-send "
2610 "version requires a barrier between posting receives and posting ready "
2611 "sends. This should have been checked before. "
2612 "Please report this bug to the Tpetra developers.");
2614 const int myProcID = comm_->getRank ();
2615 size_t selfReceiveOffset = 0;
2617 #ifdef HAVE_TEUCHOS_DEBUG
2619 size_t totalNumImportPackets = 0;
2620 for (size_type ii = 0; ii < numImportPacketsPerLID.size (); ++ii) {
2621 totalNumImportPackets += numImportPacketsPerLID[ii];
2623 TEUCHOS_TEST_FOR_EXCEPTION(
2624 imports.extent (0) < totalNumImportPackets, std::runtime_error,
2625 "Tpetra::Distributor::doPosts(4 args, Kokkos): The 'imports' array must have "
2626 "enough entries to hold the expected number of import packets. "
2627 "imports.extent(0) = " << imports.extent (0) <<
" < "
2628 "totalNumImportPackets = " << totalNumImportPackets <<
".");
2629 #endif // HAVE_TEUCHOS_DEBUG
2636 const int pathTag = 1;
2637 const int tag = this->getTag (pathTag);
2639 #ifdef HAVE_TEUCHOS_DEBUG
2640 TEUCHOS_TEST_FOR_EXCEPTION
2641 (requests_.size () != 0, std::logic_error,
"Tpetra::Distributor::"
2642 "doPosts(4 args, Kokkos): Process " << myProcID <<
": requests_.size () = "
2643 << requests_.size () <<
" != 0.");
2644 #endif // HAVE_TEUCHOS_DEBUG
2646 std::ostringstream os;
2647 os << *prefix << (indicesTo_.empty() ?
"fast" :
"slow")
2648 <<
" path, tag=" << tag << endl;
2649 std::cerr << os.str();
2664 const size_type actualNumReceives = as<size_type> (numReceives_) +
2665 as<size_type> (selfMessage_ ? 1 : 0);
2666 requests_.resize (0);
2674 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2675 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4KV_recvs_);
2676 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2678 size_t curBufferOffset = 0;
2679 size_t curLIDoffset = 0;
2680 for (size_type i = 0; i < actualNumReceives; ++i) {
2681 size_t totalPacketsFrom_i = 0;
2682 for (
size_t j = 0; j < lengthsFrom_[i]; ++j) {
2683 totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
2685 curLIDoffset += lengthsFrom_[i];
2686 if (procsFrom_[i] != myProcID && totalPacketsFrom_i) {
2695 imports_view_type recvBuf =
2696 subview_offset (imports, curBufferOffset, totalPacketsFrom_i);
2697 requests_.push_back (ireceive<int> (recvBuf, procsFrom_[i],
2701 selfReceiveOffset = curBufferOffset;
2703 curBufferOffset += totalPacketsFrom_i;
2708 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2709 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts4KV_barrier_);
2710 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2719 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2720 Teuchos::TimeMonitor timeMonSends (*timer_doPosts4KV_sends_);
2721 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2725 Array<size_t> sendPacketOffsets(numSends_,0), packetsPerSend(numSends_,0);
2726 size_t maxNumPackets = 0;
2727 size_t curPKToffset = 0;
2728 for (
size_t pp=0; pp<numSends_; ++pp) {
2729 sendPacketOffsets[pp] = curPKToffset;
2730 size_t numPackets = 0;
2731 for (
size_t j=startsTo_[pp]; j<startsTo_[pp]+lengthsTo_[pp]; ++j) {
2732 numPackets += numExportPacketsPerLID[j];
2734 if (numPackets > maxNumPackets) maxNumPackets = numPackets;
2735 packetsPerSend[pp] = numPackets;
2736 curPKToffset += numPackets;
2741 size_t numBlocks = numSends_+ selfMessage_;
2742 size_t procIndex = 0;
2743 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myProcID)) {
2746 if (procIndex == numBlocks) {
2751 size_t selfIndex = 0;
2752 if (indicesTo_.empty()) {
2754 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2755 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4KV_sends_fast_);
2756 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2759 std::ostringstream os;
2760 os << *prefix <<
"fast path: posting sends" << endl;
2761 std::cerr << os.str();
2766 for (
size_t i = 0; i < numBlocks; ++i) {
2767 size_t p = i + procIndex;
2768 if (p > (numBlocks - 1)) {
2772 if (procsTo_[p] != myProcID && packetsPerSend[p] > 0) {
2773 exports_view_type tmpSend =
2774 subview_offset(exports, sendPacketOffsets[p], packetsPerSend[p]);
2776 if (sendType == Details::DISTRIBUTOR_SEND) {
2778 as<int> (tmpSend.size ()),
2779 procsTo_[p], tag, *comm_);
2781 else if (sendType == Details::DISTRIBUTOR_RSEND) {
2782 readySend<int> (tmpSend,
2783 as<int> (tmpSend.size ()),
2784 procsTo_[p], tag, *comm_);
2786 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2787 exports_view_type tmpSendBuf =
2788 subview_offset (exports, sendPacketOffsets[p], packetsPerSend[p]);
2789 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2792 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2793 ssend<int> (tmpSend,
2794 as<int> (tmpSend.size ()),
2795 procsTo_[p], tag, *comm_);
2798 TEUCHOS_TEST_FOR_EXCEPTION(
2799 true, std::logic_error,
2800 "Tpetra::Distributor::doPosts(4 args, Kokkos): "
2801 "Invalid send type. We should never get here. "
2802 "Please report this bug to the Tpetra developers.");
2811 deep_copy_offset(imports, exports, selfReceiveOffset,
2812 sendPacketOffsets[selfNum], packetsPerSend[selfNum]);
2815 std::ostringstream os;
2816 os << *prefix <<
"fast path: done" << endl;
2817 std::cerr << os.str();
2822 #ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2823 Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4KV_sends_slow_);
2824 #endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS
2827 std::ostringstream os;
2828 os << *prefix <<
"slow path: posting sends" << endl;
2829 std::cerr << os.str();
2832 typedef typename ExpView::non_const_value_type Packet;
2833 typedef typename ExpView::array_layout Layout;
2834 typedef typename ExpView::device_type Device;
2835 typedef typename ExpView::memory_traits Mem;
2836 Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray", maxNumPackets);
2838 TEUCHOS_TEST_FOR_EXCEPTION(
2839 sendType == Details::DISTRIBUTOR_ISEND,
2841 "Tpetra::Distributor::doPosts(4-arg, Kokkos): "
2842 "The \"send buffer\" code path may not necessarily work with nonblocking sends.");
2844 Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
2846 for (
int j=0; j<numExportPacketsPerLID.size(); ++j) {
2847 indicesOffsets[j] = ioffset;
2848 ioffset += numExportPacketsPerLID[j];
2851 for (
size_t i = 0; i < numBlocks; ++i) {
2852 size_t p = i + procIndex;
2853 if (p > (numBlocks - 1)) {
2857 if (procsTo_[p] != myProcID) {
2858 size_t sendArrayOffset = 0;
2859 size_t j = startsTo_[p];
2860 size_t numPacketsTo_p = 0;
2861 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
2862 numPacketsTo_p += numExportPacketsPerLID[j];
2863 deep_copy_offset(sendArray, exports, sendArrayOffset,
2864 indicesOffsets[j], numExportPacketsPerLID[j]);
2865 sendArrayOffset += numExportPacketsPerLID[j];
2867 if (numPacketsTo_p > 0) {
2869 subview_offset(sendArray,
size_t(0), numPacketsTo_p);
2871 if (sendType == Details::DISTRIBUTOR_RSEND) {
2872 readySend<int> (tmpSend,
2873 as<int> (tmpSend.size ()),
2874 procsTo_[p], tag, *comm_);
2876 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2877 exports_view_type tmpSendBuf =
2878 subview_offset (sendArray,
size_t(0), numPacketsTo_p);
2879 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2882 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2883 ssend<int> (tmpSend,
2884 as<int> (tmpSend.size ()),
2885 procsTo_[p], tag, *comm_);
2889 as<int> (tmpSend.size ()),
2890 procsTo_[p], tag, *comm_);
2896 selfIndex = startsTo_[p];
2901 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
2902 deep_copy_offset(imports, exports, selfReceiveOffset,
2903 indicesOffsets[selfIndex],
2904 numExportPacketsPerLID[selfIndex]);
2905 selfReceiveOffset += numExportPacketsPerLID[selfIndex];
2910 std::ostringstream os;
2911 os << *prefix <<
"slow path: done" << endl;
2912 std::cerr << os.str();
2917 template <
class ExpView,
class ImpView>
2918 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2920 doReversePostsAndWaits (
const ExpView& exports,
2922 const ImpView& imports)
2924 doReversePosts (exports, numPackets, imports);
2928 template <
class ExpView,
class ImpView>
2929 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2931 doReversePostsAndWaits (
const ExpView& exports,
2932 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2933 const ImpView& imports,
2934 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2936 TEUCHOS_TEST_FOR_EXCEPTION(requests_.size() != 0, std::runtime_error,
2937 "Tpetra::Distributor::doReversePostsAndWaits(4 args): There are "
2938 << requests_.size() <<
" outstanding nonblocking messages pending. It "
2939 "is incorrect to call this method with posts outstanding.");
2941 doReversePosts (exports, numExportPacketsPerLID, imports,
2942 numImportPacketsPerLID);
2946 template <
class ExpView,
class ImpView>
2947 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2949 doReversePosts (
const ExpView &exports,
2951 const ImpView &imports)
2954 TEUCHOS_TEST_FOR_EXCEPTION(
2955 ! indicesTo_.empty (), std::runtime_error,
2956 "Tpetra::Distributor::doReversePosts(3 args): Can only do "
2957 "reverse communication when original data are blocked by process.");
2958 if (reverseDistributor_.is_null ()) {
2959 createReverseDistributor ();
2961 reverseDistributor_->doPosts (exports, numPackets, imports);
2964 template <
class ExpView,
class ImpView>
2965 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2967 doReversePosts (
const ExpView &exports,
2968 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2969 const ImpView &imports,
2970 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2973 TEUCHOS_TEST_FOR_EXCEPTION(
2974 ! indicesTo_.empty (), std::runtime_error,
2975 "Tpetra::Distributor::doReversePosts(3 args): Can only do "
2976 "reverse communication when original data are blocked by process.");
2977 if (reverseDistributor_.is_null ()) {
2978 createReverseDistributor ();
2980 reverseDistributor_->doPosts (exports, numExportPacketsPerLID,
2981 imports, numImportPacketsPerLID);
2984 template <
class OrdinalType>
2986 computeSends(
const Teuchos::ArrayView<const OrdinalType>& importGIDs,
2987 const Teuchos::ArrayView<const int>& importProcIDs,
2988 Teuchos::Array<OrdinalType>& exportGIDs,
2989 Teuchos::Array<int>& exportProcIDs)
2998 using Teuchos::Array;
2999 using Teuchos::ArrayView;
3001 using size_type =
typename ArrayView<const OrdinalType>::size_type;
3002 const char errPrefix[] =
"Tpetra::Distributor::computeSends: ";
3003 const char suffix[] =
3004 " Please report this bug to the Tpetra developers.";
3006 const int myRank = comm_->getRank ();
3007 std::unique_ptr<std::string> prefix;
3009 prefix = createPrefix(
"computeSends");
3010 std::ostringstream os;
3011 os << *prefix <<
"Start" << endl;
3012 std::cerr << os.str();
3015 TEUCHOS_TEST_FOR_EXCEPTION
3016 (importGIDs.size () != importProcIDs.size (),
3017 std::invalid_argument, errPrefix <<
"On Process " << myRank
3018 <<
": importProcIDs.size()=" << importProcIDs.size()
3019 <<
" != importGIDs.size()=" << importGIDs.size() <<
".");
3021 const size_type numImports = importProcIDs.size();
3022 Array<size_t> importObjs(2*numImports);
3024 for (size_type i = 0; i < numImports; ++i) {
3025 importObjs[2*i] =
static_cast<size_t>(importGIDs[i]);
3026 importObjs[2*i+1] =
static_cast<size_t>(myRank);
3034 std::ostringstream os;
3035 os << *prefix <<
"Call tempPlan.createFromSends" << endl;
3036 std::cerr << os.str();
3040 const size_t numExportsAsSizeT =
3041 tempPlan.createFromSends(importProcIDs);
3042 const size_type numExports =
3043 static_cast<size_type
>(numExportsAsSizeT);
3044 TEUCHOS_TEST_FOR_EXCEPTION
3045 (numExports < 0, std::logic_error, errPrefix <<
3046 "tempPlan.createFromSends() returned numExports="
3047 << numExportsAsSizeT <<
" as a size_t, which overflows to "
3048 << numExports <<
" when cast to " <<
3049 Teuchos::TypeNameTraits<size_type>::name () <<
"." << suffix);
3050 TEUCHOS_TEST_FOR_EXCEPTION
3051 (size_type(tempPlan.getTotalReceiveLength()) != numExports,
3052 std::logic_error, errPrefix <<
"tempPlan.getTotalReceiveLength()="
3053 << tempPlan.getTotalReceiveLength () <<
" != numExports="
3054 << numExports <<
"." << suffix);
3056 if (numExports > 0) {
3057 exportGIDs.resize(numExports);
3058 exportProcIDs.resize(numExports);
3069 static_assert(
sizeof(
size_t) >=
sizeof(OrdinalType),
3070 "Tpetra::Distributor::computeSends: "
3071 "sizeof(size_t) < sizeof(OrdinalType).");
3073 TEUCHOS_TEST_FOR_EXCEPTION
3074 (tempPlan.getTotalReceiveLength () < size_t(numExports),
3076 errPrefix <<
"tempPlan.getTotalReceiveLength()="
3077 << tempPlan.getTotalReceiveLength() <<
" < numExports="
3078 << numExports <<
"." << suffix);
3080 Array<size_t> exportObjs (tempPlan.getTotalReceiveLength () * 2);
3082 std::ostringstream os;
3083 os << *prefix <<
"Call tempPlan.doPostsAndWaits" << endl;
3084 std::cerr << os.str();
3086 tempPlan.doPostsAndWaits<
size_t> (importObjs (), 2, exportObjs ());
3089 for (size_type i = 0; i < numExports; ++i) {
3090 exportGIDs[i] =
static_cast<OrdinalType
> (exportObjs[2*i]);
3091 exportProcIDs[i] =
static_cast<int> (exportObjs[2*i+1]);
3095 std::ostringstream os;
3096 os << *prefix <<
"Done" << endl;
3097 std::cerr << os.str();
3101 template <
class OrdinalType>
3103 createFromRecvs (
const Teuchos::ArrayView<const OrdinalType> &remoteGIDs,
3104 const Teuchos::ArrayView<const int> &remoteProcIDs,
3105 Teuchos::Array<OrdinalType> &exportGIDs,
3106 Teuchos::Array<int> &exportProcIDs)
3109 const char errPrefix[] =
"Tpetra::Distributor::createFromRecvs: ";
3110 const int myRank = comm_->getRank();
3112 std::unique_ptr<std::string> prefix;
3114 prefix = createPrefix(
"createFromRecvs");
3115 std::ostringstream os;
3116 os << *prefix <<
"Start" << endl;
3117 std::cerr << os.str();
3122 using Teuchos::outArg;
3123 using Teuchos::REDUCE_MAX;
3124 using Teuchos::reduceAll;
3128 (remoteGIDs.size () != remoteProcIDs.size ()) ? myRank : -1;
3129 int maxErrProc = -1;
3130 reduceAll(*comm_, REDUCE_MAX, errProc, outArg(maxErrProc));
3131 TEUCHOS_TEST_FOR_EXCEPTION
3132 (maxErrProc != -1, std::runtime_error, errPrefix <<
"Lists "
3133 "of remote IDs and remote process IDs must have the same "
3134 "size on all participating processes. Maximum process ID "
3135 "with error: " << maxErrProc <<
".");
3140 TEUCHOS_TEST_FOR_EXCEPTION
3141 (remoteGIDs.size() != remoteProcIDs.size(), std::runtime_error,
3142 errPrefix <<
"On Process " << myRank <<
": "
3143 "remoteGIDs.size()=" << remoteGIDs.size() <<
3144 " != remoteProcIDs.size()=" << remoteProcIDs.size() <<
".");
3147 computeSends(remoteGIDs, remoteProcIDs, exportGIDs, exportProcIDs);
3156 std::ostringstream os;
3157 os << *prefix <<
"numProcsSendingToMe: "
3158 << numProcsSendingToMe <<
", remoteProcIDs.size(): "
3159 << remoteProcIDs.size () <<
", selfMessage_: "
3160 << (selfMessage_ ?
"true" :
"false") <<
"" << endl;
3161 std::cerr << os.str();
3164 howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS;
3167 std::ostringstream os;
3168 os << *prefix <<
"Done" << endl;
3169 std::cerr << os.str();
3175 #endif // TPETRA_DISTRIBUTOR_HPP
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
size_t getNumReceives() const
The number of processes from which we will receive data.
std::string description() const
Return a one-line description of this object.
Teuchos::RCP< Distributor > getReverse(bool create=true) const
A reverse communication plan Distributor.
EDistributorHowInitialized
Enum indicating how and whether a Distributor was initialized.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
List of valid Distributor parameters.
virtual ~Distributor()=default
Destructor (virtual for memory safety).
static bool debug()
Whether Tpetra is in debug mode.
void swap(Distributor &rhs)
Swap the contents of rhs with those of *this.
std::string DistributorSendTypeEnumToString(EDistributorSendType sendType)
Convert an EDistributorSendType enum value to a string.
void doReversePostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the reverse communication plan.
Teuchos::ArrayView< const size_t > getLengthsFrom() const
Number of values this process will receive from each process.
Teuchos::ArrayView< const int > getProcsFrom() const
Ranks of the processes sending values to this process.
size_t createFromSends(const Teuchos::ArrayView< const int > &exportProcIDs)
Set up Distributor using list of process ranks to which this process will send.
Details::EDistributorHowInitialized howInitialized() const
Return an enum indicating whether and how a Distributor was initialized.
void doPosts(const Teuchos::ArrayRCP< const Packet > &exports, size_t numPackets, const Teuchos::ArrayRCP< Packet > &imports)
Post the data for a forward plan, but do not execute the waits yet.
Teuchos::ArrayView< const int > getProcsTo() const
Ranks of the processes to which this process will send values.
void createFromSendsAndRecvs(const Teuchos::ArrayView< const int > &exportProcIDs, const Teuchos::ArrayView< const int > &remoteProcIDs)
Set up Distributor using list of process ranks to which to send, and list of process ranks from which...
bool hasSelfMessage() const
Whether the calling process will send or receive messages to itself.
Sets up and executes a communication plan for a Tpetra DistObject.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &plist)
Set Distributor parameters.
void doReversePosts(const Teuchos::ArrayRCP< const Packet > &exports, size_t numPackets, const Teuchos::ArrayRCP< Packet > &imports)
Post the data for a reverse plan, but do not execute the waits yet.
Teuchos::ArrayView< const size_t > getLengthsTo() const
Number of values this process will send to each process.
Teuchos::Array< std::string > distributorSendTypes()
Valid values for Distributor's "Send type" parameter.
std::string DistributorHowInitializedEnumToString(EDistributorHowInitialized how)
Convert an EDistributorHowInitialized enum value to a string.
Stand-alone utility functions and macros.
void getLastDoStatistics(size_t &bytes_sent, size_t &bytes_recvd) const
Information on the last call to do/doReverse.
size_t getNumSends() const
The number of processes to which we will send data.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
size_t getMaxSendLength() const
Maximum number of values this process will send to another single process.
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.
EDistributorSendType
The type of MPI send that Distributor should use.
Distributor(const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Construct using the specified communicator and default parameters.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.