42 #ifndef TPETRA_DISTRIBUTOR_HPP 
   43 #define TPETRA_DISTRIBUTOR_HPP 
   46 #include <Teuchos_as.hpp> 
   47 #include <Teuchos_Describable.hpp> 
   48 #include <Teuchos_ParameterListAcceptorDefaultBase.hpp> 
   49 #include <Teuchos_VerboseObject.hpp> 
   61 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
   62 #  undef TPETRA_DISTRIBUTOR_TIMERS 
   63 #endif // TPETRA_DISTRIBUTOR_TIMERS 
   65 #include "KokkosCompat_View.hpp" 
   66 #include "Kokkos_Core.hpp" 
   67 #include "Kokkos_TeuchosCommAdapters.hpp" 
   70 #include <type_traits> 
   98       DISTRIBUTOR_NOT_INITIALIZED, 
 
   99       DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS, 
 
  100       DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS, 
 
  101       DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS_N_RECVS, 
 
  102       DISTRIBUTOR_INITIALIZED_BY_REVERSE, 
 
  103       DISTRIBUTOR_INITIALIZED_BY_COPY, 
 
  191     public Teuchos::Describable,
 
  192     public Teuchos::ParameterListAcceptorDefaultBase {
 
  205     explicit Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm);
 
  218     Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
 
  219                  const Teuchos::RCP<Teuchos::FancyOStream>& out);
 
  234     Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
 
  235                  const Teuchos::RCP<Teuchos::ParameterList>& plist);
 
  253     Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
 
  254                  const Teuchos::RCP<Teuchos::FancyOStream>& out,
 
  255                  const Teuchos::RCP<Teuchos::ParameterList>& plist);
 
  312     size_t createFromSends (
const Teuchos::ArrayView<const int>& exportProcIDs);
 
  347     template <
class Ordinal>
 
  350                      const Teuchos::ArrayView<const int>& remoteProcIDs,
 
  351                      Teuchos::Array<Ordinal>& exportIDs,
 
  352                      Teuchos::Array<int>& exportProcIDs);
 
  363                              const Teuchos::ArrayView<const int>& remoteProcIDs);
 
  398     Teuchos::ArrayView<const int> 
getProcsTo() 
const;
 
  423       return howInitialized_;
 
  466     template <
class Packet>
 
  470                      const Teuchos::ArrayView<Packet> &imports);
 
  493     template <
class Packet>
 
  496                      const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
  497                      const Teuchos::ArrayView<Packet> &imports,
 
  498                      const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
 
  524     template <
class Packet>
 
  526     doPosts (
const Teuchos::ArrayRCP<const Packet> &exports,
 
  528              const Teuchos::ArrayRCP<Packet> &imports);
 
  548     template <
class Packet>
 
  550     doPosts (
const Teuchos::ArrayRCP<const Packet> &exports,
 
  551              const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
  552              const Teuchos::ArrayRCP<Packet> &imports,
 
  553              const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
 
  567     template <
class Packet>
 
  571                             const Teuchos::ArrayView<Packet> &imports);
 
  577     template <
class Packet>
 
  580                             const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
  581                             const Teuchos::ArrayView<Packet> &imports,
 
  582                             const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
 
  588     template <
class Packet>
 
  592                     const Teuchos::ArrayRCP<Packet> &imports);
 
  598     template <
class Packet>
 
  601                     const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
  602                     const Teuchos::ArrayRCP<Packet> &imports,
 
  603                     const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
 
  633     template <
class ExpView, 
class ImpView>
 
  634     typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
  636       const ExpView &exports,
 
  638       const ImpView &imports);
 
  661     template <
class ExpView, 
class ImpView>
 
  662     typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
  664                      const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
  665                      const ImpView &imports,
 
  666                      const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
 
  692     template <
class ExpView, 
class ImpView>
 
  693     typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
  694     doPosts (
const ExpView &exports,
 
  696              const ImpView &imports);
 
  716     template <
class ExpView, 
class ImpView>
 
  717     typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
  718     doPosts (
const ExpView &exports,
 
  719              const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
  720              const ImpView &imports,
 
  721              const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
 
  727     template <
class ExpView, 
class ImpView>
 
  728     typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
  731                             const ImpView &imports);
 
  737     template <
class ExpView, 
class ImpView>
 
  738     typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
  740                             const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
  741                             const ImpView &imports,
 
  742                             const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
 
  748     template <
class ExpView, 
class ImpView>
 
  749     typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
  752                     const ImpView &imports);
 
  758     template <
class ExpView, 
class ImpView>
 
  759     typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
  761                     const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
  762                     const ImpView &imports,
 
  763                     const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
 
  769       bytes_sent  = lastRoundBytesSend_;
 
  770       bytes_recvd = lastRoundBytesRecv_;
 
  802     describe (Teuchos::FancyOStream& out,
 
  803               const Teuchos::EVerbosityLevel verbLevel =
 
  804                 Teuchos::Describable::verbLevel_default) 
const;
 
  809     Teuchos::RCP<const Teuchos::Comm<int> > comm_;
 
  812     Teuchos::RCP<Teuchos::FancyOStream> out_;
 
  824     bool barrierBetween_;
 
  850     Teuchos::Array<int> procsTo_;
 
  860     Teuchos::Array<size_t> startsTo_;
 
  867     Teuchos::Array<size_t> lengthsTo_;
 
  872     size_t maxSendLength_;
 
  889     Teuchos::Array<size_t> indicesTo_;
 
  908     size_t totalReceiveLength_;
 
  915     Teuchos::Array<size_t> lengthsFrom_;
 
  922     Teuchos::Array<int> procsFrom_;
 
  929     Teuchos::Array<size_t> startsFrom_;
 
  936     Teuchos::Array<size_t> indicesFrom_;
 
  944     Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest<int> > > requests_;
 
  950     mutable Teuchos::RCP<Distributor> reverseDistributor_;
 
  953     size_t lastRoundBytesSend_;
 
  956     size_t lastRoundBytesRecv_;
 
  958 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
  959     Teuchos::RCP<Teuchos::Time> timer_doPosts3_;
 
  960     Teuchos::RCP<Teuchos::Time> timer_doPosts4_;
 
  961     Teuchos::RCP<Teuchos::Time> timer_doWaits_;
 
  962     Teuchos::RCP<Teuchos::Time> timer_doPosts3_recvs_;
 
  963     Teuchos::RCP<Teuchos::Time> timer_doPosts4_recvs_;
 
  964     Teuchos::RCP<Teuchos::Time> timer_doPosts3_barrier_;
 
  965     Teuchos::RCP<Teuchos::Time> timer_doPosts4_barrier_;
 
  966     Teuchos::RCP<Teuchos::Time> timer_doPosts3_sends_;
 
  967     Teuchos::RCP<Teuchos::Time> timer_doPosts4_sends_;
 
  971 #endif // TPETRA_DISTRIBUTOR_TIMERS 
  984     bool useDistinctTags_;
 
  990     int getTag (
const int pathTag) 
const;
 
 1002     void computeReceives ();
 
 1016     template <
class Ordinal>
 
 1017     void computeSends (
const Teuchos::ArrayView<const Ordinal> &remoteGIDs,
 
 1018                        const Teuchos::ArrayView<const int> &remoteProcIDs,
 
 1019                        Teuchos::Array<Ordinal> &exportGIDs,
 
 1020                        Teuchos::Array<int> &exportProcIDs);
 
 1023     void createReverseDistributor() 
const;
 
 1031     localDescribeToString (
const Teuchos::EVerbosityLevel vl) 
const;
 
 1035   template <
class Packet>
 
 1039                    const Teuchos::ArrayView<Packet>& imports)
 
 1041     using Teuchos::arcp;
 
 1042     using Teuchos::ArrayRCP;
 
 1043     typedef typename ArrayRCP<const Packet>::size_type size_type;
 
 1045     TEUCHOS_TEST_FOR_EXCEPTION(
 
 1046       requests_.size () != 0, std::runtime_error, 
"Tpetra::Distributor::" 
 1047       "doPostsAndWaits(3 args): There are " << requests_.size () <<
 
 1048       " outstanding nonblocking messages pending.  It is incorrect to call " 
 1049       "this method with posts outstanding.");
 
 1061     ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (),
 
 1062                                         static_cast<size_type
> (0),
 
 1063                                         exports.size(), 
false);
 
 1078              arcp<Packet> (imports.getRawPtr (), 0, imports.size (), 
false));
 
 1081     lastRoundBytesSend_ = exports.size () * 
sizeof (Packet);
 
 1082     lastRoundBytesRecv_ = imports.size () * 
sizeof (Packet);
 
 1085   template <
class Packet>
 
 1088                    const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
 1089                    const Teuchos::ArrayView<Packet> &imports,
 
 1090                    const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
 
 1092     using Teuchos::arcp;
 
 1093     using Teuchos::ArrayRCP;
 
 1095     TEUCHOS_TEST_FOR_EXCEPTION(
 
 1096       requests_.size () != 0, std::runtime_error, 
"Tpetra::Distributor::" 
 1097       "doPostsAndWaits: There are " << requests_.size () << 
" outstanding " 
 1098       "nonblocking messages pending.  It is incorrect to call doPostsAndWaits " 
 1099       "with posts outstanding.");
 
 1112     typedef typename ArrayRCP<const Packet>::size_type size_type;
 
 1113     ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (),
 
 1114                                         static_cast<size_type
> (0),
 
 1115                                         exports.size (), 
false);
 
 1121              numExportPacketsPerLID,
 
 1122              arcp<Packet> (imports.getRawPtr (), 0, imports.size (), 
false),
 
 1123              numImportPacketsPerLID);
 
 1126     lastRoundBytesSend_ = exports.size () * 
sizeof (Packet);
 
 1127     lastRoundBytesRecv_ = imports.size () * 
sizeof (Packet);
 
 1131   template <
class Packet>
 
 1133   doPosts (
const Teuchos::ArrayRCP<const Packet>& exports,
 
 1135            const Teuchos::ArrayRCP<Packet>& imports)
 
 1137     using Teuchos::Array;
 
 1138     using Teuchos::ArrayRCP;
 
 1139     using Teuchos::ArrayView;
 
 1141     using Teuchos::FancyOStream;
 
 1142     using Teuchos::includesVerbLevel;
 
 1143     using Teuchos::ireceive;
 
 1144     using Teuchos::isend;
 
 1145     using Teuchos::OSTab;
 
 1146     using Teuchos::readySend;
 
 1147     using Teuchos::send;
 
 1148     using Teuchos::ssend;
 
 1149     using Teuchos::TypeNameTraits;
 
 1150     using Teuchos::typeName;
 
 1152     typedef Array<size_t>::size_type size_type;
 
 1154 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 1155     Teuchos::TimeMonitor timeMon (*timer_doPosts3_);
 
 1156 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 1158     const int myRank = comm_->getRank ();
 
 1162     const bool doBarrier = barrierBetween_;
 
 1164     Teuchos::OSTab tab0 (out_);
 
 1165     std::unique_ptr<std::string> prefix;
 
 1167       std::ostringstream os;
 
 1168       os << 
"Proc " << myRank << 
": Distributor::doPosts(3-arg, ArrayRCP): ";
 
 1169       prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
 
 1173     Teuchos::OSTab tab1 (out_);
 
 1175     TEUCHOS_TEST_FOR_EXCEPTION(
 
 1176       sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier, std::logic_error,
 
 1177       "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Ready-send " 
 1178       "version requires a barrier between posting receives and posting ready " 
 1179       "sends.  This should have been checked before.  " 
 1180       "Please report this bug to the Tpetra developers.");
 
 1182     size_t selfReceiveOffset = 0;
 
 1187     if (howInitialized_ != Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE) {
 
 1194       const size_t totalNumImportPackets = totalReceiveLength_ * numPackets;
 
 1195       TEUCHOS_TEST_FOR_EXCEPTION
 
 1196         (static_cast<size_t> (imports.size ()) < totalNumImportPackets,
 
 1197          std::invalid_argument,
 
 1198          "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): " 
 1199          "The 'imports' array must have enough entries to hold the expected number " 
 1200          "of import packets.  imports.size() = " << imports.size () << 
" < " 
 1201          "totalNumImportPackets = " << totalNumImportPackets << 
".");
 
 1209     const int pathTag = 0;
 
 1210     const int tag = this->getTag (pathTag);
 
 1212 #ifdef HAVE_TPETRA_DEBUG 
 1213     TEUCHOS_TEST_FOR_EXCEPTION
 
 1214       (requests_.size () != 0,
 
 1216        "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Process " 
 1217        << myRank << 
": requests_.size() = " << requests_.size () << 
" != 0.");
 
 1218 #endif // HAVE_TPETRA_DEBUG 
 1233     const size_type actualNumReceives = as<size_type> (numReceives_) +
 
 1234       as<size_type> (selfMessage_ ? 1 : 0);
 
 1235     requests_.resize (0);
 
 1238       std::ostringstream os;
 
 1239       os << *prefix << (indicesTo_.empty () ? 
"Fast" : 
"Slow")
 
 1240          << 
": Post receives" << endl;
 
 1250 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 1251       Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3_recvs_);
 
 1252 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 1254       size_t curBufOffset = 0;
 
 1255       for (size_type i = 0; i < actualNumReceives; ++i) {
 
 1256         const size_t curBufLen = lengthsFrom_[i] * numPackets;
 
 1257         if (procsFrom_[i] != myRank) {
 
 1259             std::ostringstream os;
 
 1260             os << *prefix << (indicesTo_.empty () ? 
"Fast" : 
"Slow")
 
 1261                << 
": Post irecv: {source: " << procsFrom_[i]
 
 1262                << 
", tag: " << tag << 
"}" << endl;
 
 1272           TEUCHOS_TEST_FOR_EXCEPTION(
 
 1273             curBufOffset + curBufLen > static_cast<size_t> (imports.size ()),
 
 1275             "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): " 
 1276             "Exceeded size of 'imports' array in packing loop on Process " <<
 
 1277             myRank << 
".  imports.size() = " << imports.size () << 
" < " 
 1278             "curBufOffset(" << curBufOffset << 
") + curBufLen(" << curBufLen
 
 1280           ArrayRCP<Packet> recvBuf =
 
 1281             imports.persistingView (curBufOffset, curBufLen);
 
 1282           requests_.push_back (ireceive<int, Packet> (recvBuf, procsFrom_[i],
 
 1286           selfReceiveOffset = curBufOffset; 
 
 1288         curBufOffset += curBufLen;
 
 1293 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 1294       Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts3_barrier_);
 
 1295 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 1298         std::ostringstream os;
 
 1299         os << *prefix << (indicesTo_.empty () ? 
"Fast" : 
"Slow")
 
 1300            << 
": Barrier" << endl;
 
 1311 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 1312     Teuchos::TimeMonitor timeMonSends (*timer_doPosts3_sends_);
 
 1313 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 1320     size_t numBlocks = numSends_ + selfMessage_;
 
 1321     size_t procIndex = 0;
 
 1322     while ((procIndex < numBlocks) && (procsTo_[procIndex] < myRank)) {
 
 1325     if (procIndex == numBlocks) {
 
 1330     size_t selfIndex = 0;
 
 1333       std::ostringstream os;
 
 1334       os << *prefix << (indicesTo_.empty () ? 
"Fast" : 
"Slow")
 
 1335          << 
": Post sends" << endl;
 
 1339     if (indicesTo_.empty ()) {
 
 1342       for (
size_t i = 0; i < numBlocks; ++i) {
 
 1343         size_t p = i + procIndex;
 
 1344         if (p > (numBlocks - 1)) {
 
 1348         if (procsTo_[p] != myRank) {
 
 1350             std::ostringstream os;
 
 1351             os << *prefix << 
": Post send: {target: " 
 1352                << procsTo_[p] << 
", tag: " << tag << 
"}" << endl;
 
 1356           ArrayView<const Packet> tmpSend =
 
 1357             exports.view (startsTo_[p]*numPackets, lengthsTo_[p]*numPackets);
 
 1359           if (sendType == Details::DISTRIBUTOR_SEND) {
 
 1360             send<int, Packet> (tmpSend.getRawPtr (),
 
 1361                                as<int> (tmpSend.size ()),
 
 1362                                procsTo_[p], tag, *comm_);
 
 1364           else if (sendType == Details::DISTRIBUTOR_ISEND) {
 
 1365             ArrayRCP<const Packet> tmpSendBuf =
 
 1366               exports.persistingView (startsTo_[p] * numPackets,
 
 1367                                       lengthsTo_[p] * numPackets);
 
 1368             requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
 
 1371           else if (sendType == Details::DISTRIBUTOR_RSEND) {
 
 1372             readySend<int, Packet> (tmpSend.getRawPtr (),
 
 1373                                     as<int> (tmpSend.size ()),
 
 1374                                     procsTo_[p], tag, *comm_);
 
 1376           else if (sendType == Details::DISTRIBUTOR_SSEND) {
 
 1377             ssend<int, Packet> (tmpSend.getRawPtr (),
 
 1378                                 as<int> (tmpSend.size ()),
 
 1379                                 procsTo_[p], tag, *comm_);
 
 1381             TEUCHOS_TEST_FOR_EXCEPTION(
 
 1382               true, std::logic_error,
 
 1383               "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): " 
 1384               "Invalid send type.  We should never get here.  " 
 1385               "Please report this bug to the Tpetra developers.");
 
 1395           std::ostringstream os;
 
 1396           os << *prefix << 
"Fast: Self-send" << endl;
 
 1406         std::copy (exports.begin()+startsTo_[selfNum]*numPackets,
 
 1407                    exports.begin()+startsTo_[selfNum]*numPackets+lengthsTo_[selfNum]*numPackets,
 
 1408                    imports.begin()+selfReceiveOffset);
 
 1414       ArrayRCP<Packet> sendArray (maxSendLength_ * numPackets); 
 
 1416       TEUCHOS_TEST_FOR_EXCEPTION(
 
 1417         sendType == Details::DISTRIBUTOR_ISEND, std::logic_error,
 
 1418         "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): " 
 1419         "The \"send buffer\" code path doesn't currently work with " 
 1420         "nonblocking sends.");
 
 1422       for (
size_t i = 0; i < numBlocks; ++i) {
 
 1423         size_t p = i + procIndex;
 
 1424         if (p > (numBlocks - 1)) {
 
 1428         if (procsTo_[p] != myRank) {
 
 1430             std::ostringstream os;
 
 1431             os << *prefix << 
"Slow: Post send: " 
 1432               "{target: " << procsTo_[p] << 
", tag: " << tag << 
"}" << endl;
 
 1436           typename ArrayView<const Packet>::iterator srcBegin, srcEnd;
 
 1437           size_t sendArrayOffset = 0;
 
 1438           size_t j = startsTo_[p];
 
 1439           for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
 
 1440             srcBegin = exports.begin() + indicesTo_[j]*numPackets;
 
 1441             srcEnd   = srcBegin + numPackets;
 
 1442             std::copy (srcBegin, srcEnd, sendArray.begin()+sendArrayOffset);
 
 1443             sendArrayOffset += numPackets;
 
 1445           ArrayView<const Packet> tmpSend =
 
 1446             sendArray.view (0, lengthsTo_[p]*numPackets);
 
 1448           if (sendType == Details::DISTRIBUTOR_SEND) {
 
 1449             send<int, Packet> (tmpSend.getRawPtr (),
 
 1450                                as<int> (tmpSend.size ()),
 
 1451                                procsTo_[p], tag, *comm_);
 
 1453           else if (sendType == Details::DISTRIBUTOR_ISEND) {
 
 1454             ArrayRCP<const Packet> tmpSendBuf =
 
 1455               sendArray.persistingView (0, lengthsTo_[p] * numPackets);
 
 1456             requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
 
 1459           else if (sendType == Details::DISTRIBUTOR_RSEND) {
 
 1460             readySend<int, Packet> (tmpSend.getRawPtr (),
 
 1461                                     as<int> (tmpSend.size ()),
 
 1462                                     procsTo_[p], tag, *comm_);
 
 1464           else if (sendType == Details::DISTRIBUTOR_SSEND) {
 
 1465             ssend<int, Packet> (tmpSend.getRawPtr (),
 
 1466                                 as<int> (tmpSend.size ()),
 
 1467                                 procsTo_[p], tag, *comm_);
 
 1470             TEUCHOS_TEST_FOR_EXCEPTION(
 
 1471               true, std::logic_error,
 
 1472               "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): " 
 1473               "Invalid send type.  We should never get here.  " 
 1474               "Please report this bug to the Tpetra developers.");
 
 1479           selfIndex = startsTo_[p];
 
 1485           std::ostringstream os;
 
 1486           os << *prefix << 
"Slow: Self-send" << endl;
 
 1489         for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
 
 1490           std::copy (exports.begin()+indicesTo_[selfIndex]*numPackets,
 
 1491                      exports.begin()+indicesTo_[selfIndex]*numPackets + numPackets,
 
 1492                      imports.begin() + selfReceiveOffset);
 
 1494           selfReceiveOffset += numPackets;
 
 1500       std::ostringstream os;
 
 1501       os << *prefix << 
"Done!" << endl;
 
 1506   template <
class Packet>
 
 1508   doPosts (
const Teuchos::ArrayRCP<const Packet>& exports,
 
 1509            const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
 1510            const Teuchos::ArrayRCP<Packet>& imports,
 
 1511            const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
 
 1513     using Teuchos::Array;
 
 1514     using Teuchos::ArrayRCP;
 
 1515     using Teuchos::ArrayView;
 
 1517     using Teuchos::ireceive;
 
 1518     using Teuchos::isend;
 
 1519     using Teuchos::readySend;
 
 1520     using Teuchos::send;
 
 1521     using Teuchos::ssend;
 
 1522     using Teuchos::TypeNameTraits;
 
 1523 #ifdef HAVE_TEUCHOS_DEBUG 
 1524     using Teuchos::OSTab;
 
 1525 #endif // HAVE_TEUCHOS_DEBUG 
 1527     typedef Array<size_t>::size_type size_type;
 
 1529     Teuchos::OSTab tab (out_);
 
 1531 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 1532     Teuchos::TimeMonitor timeMon (*timer_doPosts4_);
 
 1533 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 1538     const bool doBarrier = barrierBetween_;
 
 1564     TEUCHOS_TEST_FOR_EXCEPTION(
 
 1565       sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
 
 1567       "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): Ready-send " 
 1568       "version requires a barrier between posting receives and posting ready " 
 1569       "ends.  This should have been checked before.  " 
 1570       "Please report this bug to the Tpetra developers.");
 
 1572     const int myProcID = comm_->getRank ();
 
 1573     size_t selfReceiveOffset = 0;
 
 1575 #ifdef HAVE_TEUCHOS_DEBUG 
 1577     size_t totalNumImportPackets = 0;
 
 1578     for (
size_t ii = 0; ii < static_cast<size_t> (numImportPacketsPerLID.size ()); ++ii) {
 
 1579       totalNumImportPackets += numImportPacketsPerLID[ii];
 
 1581     TEUCHOS_TEST_FOR_EXCEPTION(
 
 1582       static_cast<size_t> (imports.size ()) < totalNumImportPackets,
 
 1584       "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): The 'imports' " 
 1585       "array must have enough entries to hold the expected number of import " 
 1586       "packets.  imports.size() = " << imports.size() << 
" < " 
 1587       "totalNumImportPackets = " << totalNumImportPackets << 
".");
 
 1588 #endif // HAVE_TEUCHOS_DEBUG 
 1595     const int pathTag = 1;
 
 1596     const int tag = this->getTag (pathTag);
 
 1598 #ifdef HAVE_TEUCHOS_DEBUG 
 1599     TEUCHOS_TEST_FOR_EXCEPTION
 
 1600       (requests_.size () != 0,
 
 1602        "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): Process " 
 1603        << myProcID << 
": requests_.size() = " << requests_.size ()
 
 1605 #endif // HAVE_TEUCHOS_DEBUG 
 1607       std::ostringstream os;
 
 1608       os << 
"Proc " << myProcID << 
": doPosts(4 args, Teuchos::ArrayRCP, " 
 1609          << (indicesTo_.empty () ? 
"fast" : 
"slow") << 
")" << endl;
 
 1626     const size_type actualNumReceives = as<size_type> (numReceives_) +
 
 1627       as<size_type> (selfMessage_ ? 1 : 0);
 
 1628     requests_.resize (0);
 
 1636 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 1637       Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4_recvs_);
 
 1638 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 1640       size_t curBufferOffset = 0;
 
 1641       size_t curLIDoffset = 0;
 
 1642       for (size_type i = 0; i < actualNumReceives; ++i) {
 
 1643         size_t totalPacketsFrom_i = 0;
 
 1644         for (
size_t j = 0; j < lengthsFrom_[i]; ++j) {
 
 1645           totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
 
 1647         curLIDoffset += lengthsFrom_[i];
 
 1648         if (procsFrom_[i] != myProcID && totalPacketsFrom_i) {
 
 1657           ArrayRCP<Packet> recvBuf =
 
 1658             imports.persistingView (curBufferOffset, totalPacketsFrom_i);
 
 1659           requests_.push_back (ireceive<int, Packet> (recvBuf, procsFrom_[i],
 
 1663           selfReceiveOffset = curBufferOffset; 
 
 1665         curBufferOffset += totalPacketsFrom_i;
 
 1670 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 1671       Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts4_barrier_);
 
 1672 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 1681 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 1682     Teuchos::TimeMonitor timeMonSends (*timer_doPosts4_sends_);
 
 1683 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 1687     Array<size_t> sendPacketOffsets(numSends_,0), packetsPerSend(numSends_,0);
 
 1688     size_t maxNumPackets = 0;
 
 1689     size_t curPKToffset = 0;
 
 1690     for (
size_t pp=0; pp<numSends_; ++pp) {
 
 1691       sendPacketOffsets[pp] = curPKToffset;
 
 1692       size_t numPackets = 0;
 
 1693       for (
size_t j=startsTo_[pp]; j<startsTo_[pp]+lengthsTo_[pp]; ++j) {
 
 1694         numPackets += numExportPacketsPerLID[j];
 
 1696       if (numPackets > maxNumPackets) maxNumPackets = numPackets;
 
 1697       packetsPerSend[pp] = numPackets;
 
 1698       curPKToffset += numPackets;
 
 1703     size_t numBlocks = numSends_+ selfMessage_;
 
 1704     size_t procIndex = 0;
 
 1705     while ((procIndex < numBlocks) && (procsTo_[procIndex] < myProcID)) {
 
 1708     if (procIndex == numBlocks) {
 
 1713     size_t selfIndex = 0;
 
 1715     if (indicesTo_.empty()) {
 
 1717         std::ostringstream os;
 
 1718         os << 
"Proc " << myProcID
 
 1719            << 
": doPosts(4 args, Teuchos::ArrayRCP, fast): posting sends" << endl;
 
 1725       for (
size_t i = 0; i < numBlocks; ++i) {
 
 1726         size_t p = i + procIndex;
 
 1727         if (p > (numBlocks - 1)) {
 
 1731         if (procsTo_[p] != myProcID && packetsPerSend[p] > 0) {
 
 1732           ArrayView<const Packet> tmpSend =
 
 1733             exports.view (sendPacketOffsets[p], packetsPerSend[p]);
 
 1735           if (sendType == Details::DISTRIBUTOR_SEND) { 
 
 1736             send<int, Packet> (tmpSend.getRawPtr (),
 
 1737                                as<int> (tmpSend.size ()),
 
 1738                                procsTo_[p], tag, *comm_);
 
 1740           else if (sendType == Details::DISTRIBUTOR_RSEND) {
 
 1741             readySend<int, Packet> (tmpSend.getRawPtr (),
 
 1742                                     as<int> (tmpSend.size ()),
 
 1743                                     procsTo_[p], tag, *comm_);
 
 1745           else if (sendType == Details::DISTRIBUTOR_ISEND) {
 
 1746             ArrayRCP<const Packet> tmpSendBuf =
 
 1747               exports.persistingView (sendPacketOffsets[p], packetsPerSend[p]);
 
 1748             requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
 
 1751           else if (sendType == Details::DISTRIBUTOR_SSEND) {
 
 1752             ssend<int, Packet> (tmpSend.getRawPtr (),
 
 1753                                 as<int> (tmpSend.size ()),
 
 1754                                 procsTo_[p], tag, *comm_);
 
 1757             TEUCHOS_TEST_FOR_EXCEPTION(
 
 1758               true, std::logic_error,
 
 1759               "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): " 
 1760               "Invalid send type.  We should never get here.  Please report " 
 1761               "this bug to the Tpetra developers.");
 
 1770         std::copy (exports.begin()+sendPacketOffsets[selfNum],
 
 1771                    exports.begin()+sendPacketOffsets[selfNum]+packetsPerSend[selfNum],
 
 1772                    imports.begin()+selfReceiveOffset);
 
 1775         std::ostringstream os;
 
 1776         os << 
"Proc " << myProcID
 
 1777            << 
": doPosts(4 args, Teuchos::ArrayRCP, fast) done" << endl;
 
 1783         std::ostringstream os;
 
 1784         os << 
"Proc " << myProcID
 
 1785            << 
": doPosts(4 args, Teuchos::ArrayRCP, slow): posting sends" << endl;
 
 1790       ArrayRCP<Packet> sendArray (maxNumPackets); 
 
 1792       TEUCHOS_TEST_FOR_EXCEPTION(
 
 1793         sendType == Details::DISTRIBUTOR_ISEND,
 
 1795         "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): " 
 1796         "The \"send buffer\" code path may not necessarily work with nonblocking sends.");
 
 1798       Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
 
 1800       for (
int j=0; j<numExportPacketsPerLID.size(); ++j) {
 
 1801         indicesOffsets[j] = ioffset;
 
 1802         ioffset += numExportPacketsPerLID[j];
 
 1805       for (
size_t i = 0; i < numBlocks; ++i) {
 
 1806         size_t p = i + procIndex;
 
 1807         if (p > (numBlocks - 1)) {
 
 1811         if (procsTo_[p] != myProcID) {
 
 1812           typename ArrayView<const Packet>::iterator srcBegin, srcEnd;
 
 1813           size_t sendArrayOffset = 0;
 
 1814           size_t j = startsTo_[p];
 
 1815           size_t numPacketsTo_p = 0;
 
 1816           for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
 
 1817             srcBegin = exports.begin() + indicesOffsets[j];
 
 1818             srcEnd   = srcBegin + numExportPacketsPerLID[j];
 
 1819             numPacketsTo_p += numExportPacketsPerLID[j];
 
 1820             std::copy (srcBegin, srcEnd, sendArray.begin()+sendArrayOffset);
 
 1821             sendArrayOffset += numExportPacketsPerLID[j];
 
 1823           if (numPacketsTo_p > 0) {
 
 1824             ArrayView<const Packet> tmpSend =
 
 1825               sendArray.view (0, numPacketsTo_p);
 
 1827             if (sendType == Details::DISTRIBUTOR_RSEND) {
 
 1828               readySend<int, Packet> (tmpSend.getRawPtr (),
 
 1829                                       as<int> (tmpSend.size ()),
 
 1830                                       procsTo_[p], tag, *comm_);
 
 1832             else if (sendType == Details::DISTRIBUTOR_ISEND) {
 
 1833               ArrayRCP<const Packet> tmpSendBuf =
 
 1834                 sendArray.persistingView (0, numPacketsTo_p);
 
 1835               requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
 
 1838             else if (sendType == Details::DISTRIBUTOR_SSEND) {
 
 1839               ssend<int, Packet> (tmpSend.getRawPtr (),
 
 1840                                   as<int> (tmpSend.size ()),
 
 1841                                   procsTo_[p], tag, *comm_);
 
 1844               send<int, Packet> (tmpSend.getRawPtr (),
 
 1845                                  as<int> (tmpSend.size ()),
 
 1846                                  procsTo_[p], tag, *comm_);
 
 1852           selfIndex = startsTo_[p];
 
 1857         for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
 
 1858           std::copy (exports.begin()+indicesOffsets[selfIndex],
 
 1859                      exports.begin()+indicesOffsets[selfIndex]+numExportPacketsPerLID[selfIndex],
 
 1860                      imports.begin() + selfReceiveOffset);
 
 1861           selfReceiveOffset += numExportPacketsPerLID[selfIndex];
 
 1866         std::ostringstream os;
 
 1867         os << 
"Proc " << myProcID
 
 1868            << 
": doPosts(4 args, Teuchos::ArrayRCP, slow) done" << endl;
 
 1874   template <
class Packet>
 
 1878                           const Teuchos::ArrayView<Packet>& imports)
 
 1880     using Teuchos::arcp;
 
 1881     using Teuchos::ArrayRCP;
 
 1894     typedef typename ArrayRCP<const Packet>::size_type size_type;
 
 1895     ArrayRCP<const Packet> exportsArcp (exports.getRawPtr(), as<size_type> (0),
 
 1896                                         exports.size(), 
false);
 
 1903                     arcp<Packet> (imports.getRawPtr (), 0, imports.size (), 
false));
 
 1906     lastRoundBytesSend_ = exports.size() * 
sizeof(Packet);
 
 1907     lastRoundBytesRecv_ = imports.size() * 
sizeof(Packet);
 
 1910   template <
class Packet>
 
 1913                           const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
 1914                           const Teuchos::ArrayView<Packet> &imports,
 
 1915                           const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
 
 1918     using Teuchos::arcp;
 
 1919     using Teuchos::ArrayRCP;
 
 1921     TEUCHOS_TEST_FOR_EXCEPTION(
 
 1922       requests_.size () != 0, std::runtime_error, 
"Tpetra::Distributor::" 
 1923       "doReversePostsAndWaits(4 args): There are " << requests_.size ()
 
 1924       << 
" outstanding nonblocking messages pending.  It is incorrect to call " 
 1925       "this method with posts outstanding.");
 
 1938     typedef typename ArrayRCP<const Packet>::size_type size_type;
 
 1939     ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (), as<size_type> (0),
 
 1940                                         exports.size (), 
false);
 
 1942                     numExportPacketsPerLID,
 
 1943                     arcp<Packet> (imports.getRawPtr (), 0, imports.size (), 
false),
 
 1944                     numImportPacketsPerLID);
 
 1947     lastRoundBytesSend_ = exports.size() * 
sizeof(Packet);
 
 1948     lastRoundBytesRecv_ = imports.size() * 
sizeof(Packet);
 
 1951   template <
class Packet>
 
 1955                   const Teuchos::ArrayRCP<Packet>& imports)
 
 1958     TEUCHOS_TEST_FOR_EXCEPTION(
 
 1959       ! indicesTo_.empty (), std::runtime_error,
 
 1960       "Tpetra::Distributor::doReversePosts(3 args): Can only do reverse " 
 1961       "communication when original data are blocked by process.");
 
 1962     if (reverseDistributor_.is_null ()) {
 
 1963       createReverseDistributor ();
 
 1965     reverseDistributor_->doPosts (exports, numPackets, imports);
 
 1968   template <
class Packet>
 
 1971                   const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
 1972                   const Teuchos::ArrayRCP<Packet>& imports,
 
 1973                   const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
 
 1976     TEUCHOS_TEST_FOR_EXCEPTION(
 
 1977       ! indicesTo_.empty (), std::runtime_error,
 
 1978       "Tpetra::Distributor::doReversePosts(3 args): Can only do reverse " 
 1979       "communication when original data are blocked by process.");
 
 1980     if (reverseDistributor_.is_null ()) {
 
 1981       createReverseDistributor ();
 
 1983     reverseDistributor_->doPosts (exports, numExportPacketsPerLID,
 
 1984                                   imports, numImportPacketsPerLID);
 
 1987   template <
class ExpView, 
class ImpView>
 
 1988   typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
 1990   doPostsAndWaits (
const ExpView& exports,
 
 1992                    const ImpView& imports)
 
 1998     RCP<Teuchos::OSTab> tab0, tab1;
 
 2000       tab0 = rcp (
new Teuchos::OSTab (out_));
 
 2001       const int myRank = comm_->getRank ();
 
 2002       std::ostringstream os;
 
 2003       os << 
"Proc " << myRank
 
 2004          << 
": Distributor::doPostsAndWaits(3 args, Kokkos): " 
 2005          << 
"{sendType: " << DistributorSendTypeEnumToString (sendType_)
 
 2006          << 
", barrierBetween: " << barrierBetween_ << 
"}" << endl;
 
 2008       tab1 = rcp (
new Teuchos::OSTab (out_));
 
 2011     TEUCHOS_TEST_FOR_EXCEPTION(
 
 2012       requests_.size () != 0, std::runtime_error, 
"Tpetra::Distributor::" 
 2013       "doPostsAndWaits(3 args): There are " << requests_.size () <<
 
 2014       " outstanding nonblocking messages pending.  It is incorrect to call " 
 2015       "this method with posts outstanding.");
 
 2018       const int myRank = comm_->getRank ();
 
 2019       std::ostringstream os;
 
 2020       os << 
"Proc " << myRank
 
 2021          << 
": Distributor::doPostsAndWaits: Call doPosts" << endl;
 
 2024     doPosts (exports, numPackets, imports);
 
 2026       const int myRank = comm_->getRank ();
 
 2027       std::ostringstream os;
 
 2028       os << 
"Proc " << myRank
 
 2029          << 
": Distributor::doPostsAndWaits: Call doWaits" << endl;
 
 2035   template <
class ExpView, 
class ImpView>
 
 2036   typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
 2038   doPostsAndWaits (
const ExpView& exports,
 
 2039                    const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
 2040                    const ImpView& imports,
 
 2041                    const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
 
 2043     TEUCHOS_TEST_FOR_EXCEPTION(
 
 2044       requests_.size () != 0, std::runtime_error,
 
 2045       "Tpetra::Distributor::doPostsAndWaits(4 args): There are " 
 2046       << requests_.size () << 
" outstanding nonblocking messages pending.  " 
 2047       "It is incorrect to call this method with posts outstanding.");
 
 2049     doPosts (exports, numExportPacketsPerLID, imports, numImportPacketsPerLID);
 
 2054   template <
class ExpView, 
class ImpView>
 
 2055   typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
 2057   doPosts (
const ExpView &exports,
 
 2059            const ImpView &imports)
 
 2061     using Teuchos::Array;
 
 2063     using Teuchos::FancyOStream;
 
 2064     using Teuchos::includesVerbLevel;
 
 2065     using Teuchos::ireceive;
 
 2066     using Teuchos::isend;
 
 2067     using Teuchos::OSTab;
 
 2068     using Teuchos::readySend;
 
 2069     using Teuchos::send;
 
 2070     using Teuchos::ssend;
 
 2071     using Teuchos::TypeNameTraits;
 
 2072     using Teuchos::typeName;
 
 2074     using Kokkos::Compat::create_const_view;
 
 2075     using Kokkos::Compat::create_view;
 
 2076     using Kokkos::Compat::subview_offset;
 
 2077     using Kokkos::Compat::deep_copy_offset;
 
 2078     typedef Array<size_t>::size_type size_type;
 
 2079     typedef ExpView exports_view_type;
 
 2080     typedef ImpView imports_view_type;
 
 2082 #ifdef KOKKOS_ENABLE_CUDA 
 2083     static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
 
 2084                    ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
 
 2085                    "Please do not use Tpetra::Distributor with UVM " 
 2086                    "allocations.  See GitHub issue #1088.");
 
 2087 #endif // KOKKOS_ENABLE_CUDA 
 2089 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 2090     Teuchos::TimeMonitor timeMon (*timer_doPosts3_);
 
 2091 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 2093     const int myRank = comm_->getRank ();
 
 2097     const bool doBarrier = barrierBetween_;
 
 2099     Teuchos::OSTab tab0 (out_);
 
 2101       std::ostringstream os;
 
 2102       os << 
"Proc " << myRank
 
 2103          << 
": Distributor::doPosts(3 args, Kokkos)" << endl;
 
 2106     Teuchos::OSTab tab1 (out_);
 
 2108     TEUCHOS_TEST_FOR_EXCEPTION(
 
 2109       sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
 
 2111       "Tpetra::Distributor::doPosts(3 args, Kokkos): Ready-send version " 
 2112       "requires a barrier between posting receives and posting ready sends.  " 
 2113       "This should have been checked before.  " 
 2114       "Please report this bug to the Tpetra developers.");
 
 2116     size_t selfReceiveOffset = 0;
 
 2123       const size_t totalNumImportPackets = totalReceiveLength_ * numPackets;
 
 2126         std::ostringstream os;
 
 2127         os << 
"Proc " << myRank << 
": doPosts: totalNumImportPackets = " <<
 
 2128           totalNumImportPackets << 
" = " << totalReceiveLength_ << 
" * " <<
 
 2129           numPackets << 
"; imports.extent(0) = " << imports.extent (0)
 
 2134 #ifdef HAVE_TPETRA_DEBUG 
 2137         const size_t importBufSize = 
static_cast<size_t> (imports.extent (0));
 
 2138         const int lclBad = (importBufSize < totalNumImportPackets) ? 1 : 0;
 
 2140         using Teuchos::reduceAll;
 
 2141         using Teuchos::REDUCE_MAX;
 
 2142         using Teuchos::outArg;
 
 2143         reduceAll (*comm_, REDUCE_MAX, lclBad, outArg (gblBad));
 
 2144         TEUCHOS_TEST_FOR_EXCEPTION
 
 2147            "Tpetra::Distributor::doPosts(3 args, Kokkos): " 
 2148            "On one or more MPI processes, the 'imports' array " 
 2149            "does not have enough entries to hold the expected number of " 
 2150            "import packets.  ");
 
 2153       TEUCHOS_TEST_FOR_EXCEPTION
 
 2154         (static_cast<size_t> (imports.extent (0)) < totalNumImportPackets,
 
 2156          "Tpetra::Distributor::doPosts(3 args, Kokkos): The 'imports' " 
 2157          "array must have enough entries to hold the expected number of import " 
 2158          "packets.  imports.extent(0) = " << imports.extent (0) << 
" < " 
 2159          "totalNumImportPackets = " << totalNumImportPackets << 
" = " 
 2160          "totalReceiveLength_ (" << totalReceiveLength_ << 
") * numPackets (" 
 2161          << numPackets << 
").");
 
 2162 #endif // HAVE_TPETRA_DEBUG 
 2170     const int pathTag = 0;
 
 2171     const int tag = this->getTag (pathTag);
 
 2173 #ifdef HAVE_TPETRA_DEBUG 
 2174     TEUCHOS_TEST_FOR_EXCEPTION
 
 2175       (requests_.size () != 0,
 
 2177        "Tpetra::Distributor::doPosts(3 args, Kokkos): Process " 
 2178        << myRank << 
": requests_.size() = " << requests_.size () << 
" != 0.");
 
 2179 #endif // HAVE_TPETRA_DEBUG 
 2194     const size_type actualNumReceives = as<size_type> (numReceives_) +
 
 2195       as<size_type> (selfMessage_ ? 1 : 0);
 
 2196     requests_.resize (0);
 
 2199       std::ostringstream os;
 
 2200       os << 
"Proc " << myRank << 
": doPosts(3 args, Kokkos, " 
 2201          << (indicesTo_.empty () ? 
"fast" : 
"slow") << 
"): Post receives" 
 2212 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 2213       Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3_recvs_);
 
 2214 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 2216       size_t curBufferOffset = 0;
 
 2217       for (size_type i = 0; i < actualNumReceives; ++i) {
 
 2218         const size_t curBufLen = lengthsFrom_[i] * numPackets;
 
 2219         if (procsFrom_[i] != myRank) {
 
 2221             std::ostringstream os;
 
 2222             os << 
"Proc " << myRank << 
": doPosts(3 args, Kokkos, " 
 2223                << (indicesTo_.empty () ? 
"fast" : 
"slow") << 
"): " 
 2224                << 
"Post irecv: {source: " << procsFrom_[i]
 
 2225                << 
", tag: " << tag << 
"}" << endl;
 
 2235           TEUCHOS_TEST_FOR_EXCEPTION(
 
 2236             curBufferOffset + curBufLen > static_cast<size_t> (imports.size ()),
 
 2237             std::logic_error, 
"Tpetra::Distributor::doPosts(3 args, Kokkos): " 
 2238             "Exceeded size of 'imports' array in packing loop on Process " <<
 
 2239             myRank << 
".  imports.size() = " << imports.size () << 
" < " 
 2240             "curBufferOffset(" << curBufferOffset << 
") + curBufLen(" <<
 
 2242           imports_view_type recvBuf =
 
 2243             subview_offset (imports, curBufferOffset, curBufLen);
 
 2244           requests_.push_back (ireceive<int> (recvBuf, procsFrom_[i],
 
 2248           selfReceiveOffset = curBufferOffset; 
 
 2250         curBufferOffset += curBufLen;
 
 2255 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 2256       Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts3_barrier_);
 
 2257 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 2260         std::ostringstream os;
 
 2261         os << 
"Proc " << myRank << 
": doPosts(3 args, Kokkos, " 
 2262            << (indicesTo_.empty () ? 
"fast" : 
"slow") << 
"): Barrier" << endl;
 
 2273 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 2274     Teuchos::TimeMonitor timeMonSends (*timer_doPosts3_sends_);
 
 2275 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 2282     size_t numBlocks = numSends_ + selfMessage_;
 
 2283     size_t procIndex = 0;
 
 2284     while ((procIndex < numBlocks) && (procsTo_[procIndex] < myRank)) {
 
 2287     if (procIndex == numBlocks) {
 
 2292     size_t selfIndex = 0;
 
 2295       std::ostringstream os;
 
 2296       os << 
"Proc " << myRank << 
": doPosts(3 args, Kokkos, " 
 2297          << (indicesTo_.empty () ? 
"fast" : 
"slow") << 
"): Post sends" << endl;
 
 2301     if (indicesTo_.empty()) {
 
 2303         std::ostringstream os;
 
 2304         os << 
"Proc " << myRank
 
 2305            << 
": doPosts(3 args, Kokkos, fast): posting sends" << endl;
 
 2311       for (
size_t i = 0; i < numBlocks; ++i) {
 
 2312         size_t p = i + procIndex;
 
 2313         if (p > (numBlocks - 1)) {
 
 2317         if (procsTo_[p] != myRank) {
 
 2319             std::ostringstream os;
 
 2320             os << 
"Proc " << myRank << 
": doPosts(3 args, Kokkos, fast): Post send: " 
 2321               "{target: " << procsTo_[p] << 
", tag: " << tag << 
"}" << endl;
 
 2325           exports_view_type tmpSend = subview_offset(
 
 2326             exports, startsTo_[p]*numPackets, lengthsTo_[p]*numPackets);
 
 2328           if (sendType == Details::DISTRIBUTOR_SEND) {
 
 2330                        as<int> (tmpSend.size ()),
 
 2331                        procsTo_[p], tag, *comm_);
 
 2333           else if (sendType == Details::DISTRIBUTOR_ISEND) {
 
 2334             exports_view_type tmpSendBuf =
 
 2335               subview_offset (exports, startsTo_[p] * numPackets,
 
 2336                               lengthsTo_[p] * numPackets);
 
 2337             requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
 
 2340           else if (sendType == Details::DISTRIBUTOR_RSEND) {
 
 2341             readySend<int> (tmpSend,
 
 2342                             as<int> (tmpSend.size ()),
 
 2343                             procsTo_[p], tag, *comm_);
 
 2345           else if (sendType == Details::DISTRIBUTOR_SSEND) {
 
 2346             ssend<int> (tmpSend,
 
 2347                         as<int> (tmpSend.size ()),
 
 2348                         procsTo_[p], tag, *comm_);
 
 2350             TEUCHOS_TEST_FOR_EXCEPTION(
 
 2353               "Tpetra::Distributor::doPosts(3 args, Kokkos): " 
 2354               "Invalid send type.  We should never get here.  " 
 2355               "Please report this bug to the Tpetra developers.");
 
 2365           std::ostringstream os;
 
 2366           os << 
"Proc " << myRank
 
 2367              << 
": doPosts(3 args, Kokkos, fast): Self-send" << endl;
 
 2377         deep_copy_offset(imports, exports, selfReceiveOffset,
 
 2378                          startsTo_[selfNum]*numPackets,
 
 2379                          lengthsTo_[selfNum]*numPackets);
 
 2382         std::ostringstream os;
 
 2383         os << 
"Proc " << myRank << 
": doPosts(3 args, Kokkos, fast) done" << endl;
 
 2389         std::ostringstream os;
 
 2390         os << 
"Proc " << myRank
 
 2391            << 
": doPosts(3 args, Kokkos, slow): posting sends" << endl;
 
 2395       typedef typename ExpView::non_const_value_type Packet;
 
 2396       typedef typename ExpView::array_layout Layout;
 
 2397       typedef typename ExpView::device_type Device;
 
 2398       typedef typename ExpView::memory_traits Mem;
 
 2399       Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray",
 
 2400                                                maxSendLength_ * numPackets);
 
 2404       TEUCHOS_TEST_FOR_EXCEPTION(
 
 2405         sendType == Details::DISTRIBUTOR_ISEND,
 
 2407         "Tpetra::Distributor::doPosts(3 args, Kokkos): The \"send buffer\" code path " 
 2408         "doesn't currently work with nonblocking sends.");
 
 2410       for (
size_t i = 0; i < numBlocks; ++i) {
 
 2411         size_t p = i + procIndex;
 
 2412         if (p > (numBlocks - 1)) {
 
 2416         if (procsTo_[p] != myRank) {
 
 2418             std::ostringstream os;
 
 2419             os << 
"Proc " << myRank
 
 2420                << 
": doPosts(3 args, Kokkos, slow): Post send: {target: " 
 2421                << procsTo_[p] << 
", tag: " << tag << 
"}" << endl;
 
 2425           size_t sendArrayOffset = 0;
 
 2426           size_t j = startsTo_[p];
 
 2427           for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
 
 2428             deep_copy_offset(sendArray, exports, sendArrayOffset,
 
 2429                              indicesTo_[j]*numPackets, numPackets);
 
 2430             sendArrayOffset += numPackets;
 
 2433             subview_offset(sendArray, 
size_t(0), lengthsTo_[p]*numPackets);
 
 2435           if (sendType == Details::DISTRIBUTOR_SEND) {
 
 2437                        as<int> (tmpSend.size ()),
 
 2438                        procsTo_[p], tag, *comm_);
 
 2440           else if (sendType == Details::DISTRIBUTOR_ISEND) {
 
 2441             exports_view_type tmpSendBuf =
 
 2442               subview_offset (sendArray, 
size_t(0), lengthsTo_[p] * numPackets);
 
 2443             requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
 
 2446           else if (sendType == Details::DISTRIBUTOR_RSEND) {
 
 2447             readySend<int> (tmpSend,
 
 2448                             as<int> (tmpSend.size ()),
 
 2449                             procsTo_[p], tag, *comm_);
 
 2451           else if (sendType == Details::DISTRIBUTOR_SSEND) {
 
 2452             ssend<int> (tmpSend,
 
 2453                         as<int> (tmpSend.size ()),
 
 2454                         procsTo_[p], tag, *comm_);
 
 2457             TEUCHOS_TEST_FOR_EXCEPTION(
 
 2460               "Tpetra::Distributor::doPosts(3 args, Kokkos): " 
 2461               "Invalid send type.  We should never get here.  " 
 2462               "Please report this bug to the Tpetra developers.");
 
 2467           selfIndex = startsTo_[p];
 
 2473           std::ostringstream os;
 
 2474           os << 
"Proc " << myRank
 
 2475              << 
": doPosts(3 args, Kokkos, slow): Self-send" << endl;
 
 2478         for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
 
 2479           deep_copy_offset(imports, exports, selfReceiveOffset,
 
 2480                            indicesTo_[selfIndex]*numPackets, numPackets);
 
 2482           selfReceiveOffset += numPackets;
 
 2486         std::ostringstream os;
 
 2487         os << 
"Proc " << myRank
 
 2488            << 
": doPosts(3 args, Kokkos, slow) done" << endl;
 
 2494       std::ostringstream os;
 
 2495       os << 
"Proc " << myRank << 
": doPosts done" << endl;
 
 2500   template <
class ExpView, 
class ImpView>
 
 2501   typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
 2503   doPosts (
const ExpView &exports,
 
 2504            const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
 2505            const ImpView &imports,
 
 2506            const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
 
 2508     using Teuchos::Array;
 
 2510     using Teuchos::ireceive;
 
 2511     using Teuchos::isend;
 
 2512     using Teuchos::readySend;
 
 2513     using Teuchos::send;
 
 2514     using Teuchos::ssend;
 
 2515     using Teuchos::TypeNameTraits;
 
 2516 #ifdef HAVE_TEUCHOS_DEBUG 
 2517     using Teuchos::OSTab;
 
 2518 #endif // HAVE_TEUCHOS_DEBUG 
 2520     using Kokkos::Compat::create_const_view;
 
 2521     using Kokkos::Compat::create_view;
 
 2522     using Kokkos::Compat::subview_offset;
 
 2523     using Kokkos::Compat::deep_copy_offset;
 
 2524     typedef Array<size_t>::size_type size_type;
 
 2525     typedef ExpView exports_view_type;
 
 2526     typedef ImpView imports_view_type;
 
 2528 #ifdef KOKKOS_ENABLE_CUDA 
 2529     static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
 
 2530                    ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
 
 2531                    "Please do not use Tpetra::Distributor with UVM " 
 2532                    "allocations.  See GitHub issue #1088.");
 
 2533 #endif // KOKKOS_ENABLE_CUDA 
 2535     Teuchos::OSTab tab (out_);
 
 2537 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 2538     Teuchos::TimeMonitor timeMon (*timer_doPosts4_);
 
 2539 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 2544     const bool doBarrier = barrierBetween_;
 
 2570     TEUCHOS_TEST_FOR_EXCEPTION(
 
 2571       sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
 
 2572       std::logic_error, 
"Tpetra::Distributor::doPosts(4 args, Kokkos): Ready-send " 
 2573       "version requires a barrier between posting receives and posting ready " 
 2574       "sends.  This should have been checked before.  " 
 2575       "Please report this bug to the Tpetra developers.");
 
 2577     const int myProcID = comm_->getRank ();
 
 2578     size_t selfReceiveOffset = 0;
 
 2580 #ifdef HAVE_TEUCHOS_DEBUG 
 2582     size_t totalNumImportPackets = 0;
 
 2583     for (size_type ii = 0; ii < numImportPacketsPerLID.size (); ++ii) {
 
 2584       totalNumImportPackets += numImportPacketsPerLID[ii];
 
 2586     TEUCHOS_TEST_FOR_EXCEPTION(
 
 2587       imports.extent (0) < totalNumImportPackets, std::runtime_error,
 
 2588       "Tpetra::Distributor::doPosts(4 args, Kokkos): The 'imports' array must have " 
 2589       "enough entries to hold the expected number of import packets.  " 
 2590       "imports.extent(0) = " << imports.extent (0) << 
" < " 
 2591       "totalNumImportPackets = " << totalNumImportPackets << 
".");
 
 2592 #endif // HAVE_TEUCHOS_DEBUG 
 2599     const int pathTag = 1;
 
 2600     const int tag = this->getTag (pathTag);
 
 2602 #ifdef HAVE_TEUCHOS_DEBUG 
 2603     TEUCHOS_TEST_FOR_EXCEPTION
 
 2604       (requests_.size () != 0, std::logic_error, 
"Tpetra::Distributor::" 
 2605        "doPosts(4 args, Kokkos): Process " << myProcID << 
": requests_.size () = " 
 2606        << requests_.size () << 
" != 0.");
 
 2607 #endif // HAVE_TEUCHOS_DEBUG 
 2609       std::ostringstream os;
 
 2610       os << 
"Proc " << myProcID << 
": doPosts(4 args, Kokkos, " 
 2611          << (indicesTo_.empty () ? 
"fast" : 
"slow") << 
")" << endl;
 
 2628     const size_type actualNumReceives = as<size_type> (numReceives_) +
 
 2629       as<size_type> (selfMessage_ ? 1 : 0);
 
 2630     requests_.resize (0);
 
 2638 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 2639       Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4_recvs_);
 
 2640 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 2642       size_t curBufferOffset = 0;
 
 2643       size_t curLIDoffset = 0;
 
 2644       for (size_type i = 0; i < actualNumReceives; ++i) {
 
 2645         size_t totalPacketsFrom_i = 0;
 
 2646         for (
size_t j = 0; j < lengthsFrom_[i]; ++j) {
 
 2647           totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
 
 2649         curLIDoffset += lengthsFrom_[i];
 
 2650         if (procsFrom_[i] != myProcID && totalPacketsFrom_i) {
 
 2659           imports_view_type recvBuf =
 
 2660             subview_offset (imports, curBufferOffset, totalPacketsFrom_i);
 
 2661           requests_.push_back (ireceive<int> (recvBuf, procsFrom_[i],
 
 2665           selfReceiveOffset = curBufferOffset; 
 
 2667         curBufferOffset += totalPacketsFrom_i;
 
 2672 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 2673       Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts4_barrier_);
 
 2674 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 2683 #ifdef TPETRA_DISTRIBUTOR_TIMERS 
 2684     Teuchos::TimeMonitor timeMonSends (*timer_doPosts4_sends_);
 
 2685 #endif // TPETRA_DISTRIBUTOR_TIMERS 
 2689     Array<size_t> sendPacketOffsets(numSends_,0), packetsPerSend(numSends_,0);
 
 2690     size_t maxNumPackets = 0;
 
 2691     size_t curPKToffset = 0;
 
 2692     for (
size_t pp=0; pp<numSends_; ++pp) {
 
 2693       sendPacketOffsets[pp] = curPKToffset;
 
 2694       size_t numPackets = 0;
 
 2695       for (
size_t j=startsTo_[pp]; j<startsTo_[pp]+lengthsTo_[pp]; ++j) {
 
 2696         numPackets += numExportPacketsPerLID[j];
 
 2698       if (numPackets > maxNumPackets) maxNumPackets = numPackets;
 
 2699       packetsPerSend[pp] = numPackets;
 
 2700       curPKToffset += numPackets;
 
 2705     size_t numBlocks = numSends_+ selfMessage_;
 
 2706     size_t procIndex = 0;
 
 2707     while ((procIndex < numBlocks) && (procsTo_[procIndex] < myProcID)) {
 
 2710     if (procIndex == numBlocks) {
 
 2715     size_t selfIndex = 0;
 
 2716     if (indicesTo_.empty()) {
 
 2718         std::ostringstream os;
 
 2719         os << 
"Proc " << myProcID
 
 2720            << 
": doPosts(4 args, Kokkos, fast): posting sends" << endl;
 
 2726       for (
size_t i = 0; i < numBlocks; ++i) {
 
 2727         size_t p = i + procIndex;
 
 2728         if (p > (numBlocks - 1)) {
 
 2732         if (procsTo_[p] != myProcID && packetsPerSend[p] > 0) {
 
 2733           exports_view_type tmpSend =
 
 2734             subview_offset(exports, sendPacketOffsets[p], packetsPerSend[p]);
 
 2736           if (sendType == Details::DISTRIBUTOR_SEND) { 
 
 2738                        as<int> (tmpSend.size ()),
 
 2739                        procsTo_[p], tag, *comm_);
 
 2741           else if (sendType == Details::DISTRIBUTOR_RSEND) {
 
 2742             readySend<int> (tmpSend,
 
 2743                             as<int> (tmpSend.size ()),
 
 2744                             procsTo_[p], tag, *comm_);
 
 2746           else if (sendType == Details::DISTRIBUTOR_ISEND) {
 
 2747             exports_view_type tmpSendBuf =
 
 2748               subview_offset (exports, sendPacketOffsets[p], packetsPerSend[p]);
 
 2749             requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
 
 2752           else if (sendType == Details::DISTRIBUTOR_SSEND) {
 
 2753             ssend<int> (tmpSend,
 
 2754                         as<int> (tmpSend.size ()),
 
 2755                         procsTo_[p], tag, *comm_);
 
 2758             TEUCHOS_TEST_FOR_EXCEPTION(
 
 2759               true, std::logic_error,
 
 2760               "Tpetra::Distributor::doPosts(4 args, Kokkos): " 
 2761               "Invalid send type.  We should never get here.  " 
 2762               "Please report this bug to the Tpetra developers.");
 
 2771         deep_copy_offset(imports, exports, selfReceiveOffset,
 
 2772                          sendPacketOffsets[selfNum], packetsPerSend[selfNum]);
 
 2775         std::ostringstream os;
 
 2776         os << 
"Proc " << myProcID << 
": doPosts(4 args, Kokkos, fast) done" << endl;
 
 2782         std::ostringstream os;
 
 2783         os << 
"Proc " << myProcID << 
": doPosts(4 args, Kokkos, slow): posting sends" << endl;
 
 2788       typedef typename ExpView::non_const_value_type Packet;
 
 2789       typedef typename ExpView::array_layout Layout;
 
 2790       typedef typename ExpView::device_type Device;
 
 2791       typedef typename ExpView::memory_traits Mem;
 
 2792       Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray", maxNumPackets); 
 
 2794       TEUCHOS_TEST_FOR_EXCEPTION(
 
 2795         sendType == Details::DISTRIBUTOR_ISEND,
 
 2797         "Tpetra::Distributor::doPosts(4 args, Kokkos): " 
 2798         "The \"send buffer\" code path may not necessarily work with nonblocking sends.");
 
 2800       Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
 
 2802       for (
int j=0; j<numExportPacketsPerLID.size(); ++j) {
 
 2803         indicesOffsets[j] = ioffset;
 
 2804         ioffset += numExportPacketsPerLID[j];
 
 2807       for (
size_t i = 0; i < numBlocks; ++i) {
 
 2808         size_t p = i + procIndex;
 
 2809         if (p > (numBlocks - 1)) {
 
 2813         if (procsTo_[p] != myProcID) {
 
 2814           size_t sendArrayOffset = 0;
 
 2815           size_t j = startsTo_[p];
 
 2816           size_t numPacketsTo_p = 0;
 
 2817           for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
 
 2818             numPacketsTo_p += numExportPacketsPerLID[j];
 
 2819             deep_copy_offset(sendArray, exports, sendArrayOffset,
 
 2820                              indicesOffsets[j], numExportPacketsPerLID[j]);
 
 2821             sendArrayOffset += numExportPacketsPerLID[j];
 
 2823           if (numPacketsTo_p > 0) {
 
 2825               subview_offset(sendArray, 
size_t(0), numPacketsTo_p);
 
 2827             if (sendType == Details::DISTRIBUTOR_RSEND) {
 
 2828               readySend<int> (tmpSend,
 
 2829                               as<int> (tmpSend.size ()),
 
 2830                               procsTo_[p], tag, *comm_);
 
 2832             else if (sendType == Details::DISTRIBUTOR_ISEND) {
 
 2833               exports_view_type tmpSendBuf =
 
 2834                 subview_offset (sendArray, 
size_t(0), numPacketsTo_p);
 
 2835               requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
 
 2838             else if (sendType == Details::DISTRIBUTOR_SSEND) {
 
 2839               ssend<int> (tmpSend,
 
 2840                           as<int> (tmpSend.size ()),
 
 2841                           procsTo_[p], tag, *comm_);
 
 2845                          as<int> (tmpSend.size ()),
 
 2846                          procsTo_[p], tag, *comm_);
 
 2852           selfIndex = startsTo_[p];
 
 2857         for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
 
 2858           deep_copy_offset(imports, exports, selfReceiveOffset,
 
 2859                            indicesOffsets[selfIndex],
 
 2860                            numExportPacketsPerLID[selfIndex]);
 
 2861           selfReceiveOffset += numExportPacketsPerLID[selfIndex];
 
 2866         std::ostringstream os;
 
 2867         os << 
"Proc " << myProcID
 
 2868            << 
": doPosts(4 args, Kokkos, slow) done" << endl;
 
 2874   template <
class ExpView, 
class ImpView>
 
 2875   typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
 2877   doReversePostsAndWaits (
const ExpView& exports,
 
 2879                           const ImpView& imports)
 
 2881     doReversePosts (exports, numPackets, imports);
 
 2885   template <
class ExpView, 
class ImpView>
 
 2886   typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
 2888   doReversePostsAndWaits (
const ExpView& exports,
 
 2889                           const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
 2890                           const ImpView& imports,
 
 2891                           const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
 
 2893     TEUCHOS_TEST_FOR_EXCEPTION(requests_.size() != 0, std::runtime_error,
 
 2894       "Tpetra::Distributor::doReversePostsAndWaits(4 args): There are " 
 2895       << requests_.size() << 
" outstanding nonblocking messages pending.  It " 
 2896       "is incorrect to call this method with posts outstanding.");
 
 2898     doReversePosts (exports, numExportPacketsPerLID, imports,
 
 2899                     numImportPacketsPerLID);
 
 2903   template <
class ExpView, 
class ImpView>
 
 2904   typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
 2906   doReversePosts (
const ExpView &exports,
 
 2908                   const  ImpView &imports)
 
 2911     TEUCHOS_TEST_FOR_EXCEPTION(
 
 2912       ! indicesTo_.empty (), std::runtime_error,
 
 2913       "Tpetra::Distributor::doReversePosts(3 args): Can only do " 
 2914       "reverse communication when original data are blocked by process.");
 
 2915     if (reverseDistributor_.is_null ()) {
 
 2916       createReverseDistributor ();
 
 2918     reverseDistributor_->doPosts (exports, numPackets, imports);
 
 2921   template <
class ExpView, 
class ImpView>
 
 2922   typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
 
 2924   doReversePosts (
const ExpView &exports,
 
 2925                   const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
 
 2926                   const ImpView &imports,
 
 2927                   const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
 
 2930     TEUCHOS_TEST_FOR_EXCEPTION(
 
 2931       ! indicesTo_.empty (), std::runtime_error,
 
 2932       "Tpetra::Distributor::doReversePosts(3 args): Can only do " 
 2933       "reverse communication when original data are blocked by process.");
 
 2934     if (reverseDistributor_.is_null ()) {
 
 2935       createReverseDistributor ();
 
 2937     reverseDistributor_->doPosts (exports, numExportPacketsPerLID,
 
 2938                                   imports, numImportPacketsPerLID);
 
 2941   template <
class OrdinalType>
 
 2943   computeSends (
const Teuchos::ArrayView<const OrdinalType> & importGIDs,
 
 2944                 const Teuchos::ArrayView<const int> & importProcIDs,
 
 2945                 Teuchos::Array<OrdinalType> & exportGIDs,
 
 2946                 Teuchos::Array<int> & exportProcIDs)
 
 2955     using Teuchos::Array;
 
 2956     using Teuchos::ArrayView;
 
 2958     typedef typename ArrayView<const OrdinalType>::size_type size_type;
 
 2960     Teuchos::OSTab tab (out_);
 
 2961     const int myRank = comm_->getRank ();
 
 2963       std::ostringstream os;
 
 2964       os << 
"Proc " << myRank << 
": computeSends" << endl;
 
 2968     TEUCHOS_TEST_FOR_EXCEPTION(
 
 2969       importGIDs.size () != importProcIDs.size (), std::invalid_argument,
 
 2970       "Tpetra::Distributor::computeSends: On Process " << myRank << 
": " 
 2971       "importProcIDs.size() = " << importProcIDs.size ()
 
 2972       << 
" != importGIDs.size() = " << importGIDs.size () << 
".");
 
 2974     const size_type numImports = importProcIDs.size ();
 
 2975     Array<size_t> importObjs (2*numImports);
 
 2977     for (size_type i = 0; i < numImports; ++i) {
 
 2978       importObjs[2*i]   = 
static_cast<size_t> (importGIDs[i]);
 
 2979       importObjs[2*i+1] = 
static_cast<size_t> (myRank);
 
 2987       std::ostringstream os;
 
 2988       os << 
"Proc " << myRank << 
": computeSends: tempPlan.createFromSends" << endl;
 
 2994     const size_t numExportsAsSizeT = tempPlan.createFromSends (importProcIDs);
 
 2995     const size_type numExports = 
static_cast<size_type
> (numExportsAsSizeT);
 
 2996     TEUCHOS_TEST_FOR_EXCEPTION(
 
 2997       numExports < 0, std::logic_error, 
"Tpetra::Distributor::computeSends: " 
 2998       "tempPlan.createFromSends() returned numExports = " << numExportsAsSizeT
 
 2999       << 
" as a size_t, which overflows to " << numExports << 
" when cast to " 
 3000       << Teuchos::TypeNameTraits<size_type>::name () << 
".  " 
 3001       "Please report this bug to the Tpetra developers.");
 
 3002     TEUCHOS_TEST_FOR_EXCEPTION(
 
 3003       static_cast<size_type> (tempPlan.getTotalReceiveLength ()) != numExports,
 
 3004       std::logic_error, 
"Tpetra::Distributor::computeSends: tempPlan.getTotal" 
 3005       "ReceiveLength() = " << tempPlan.getTotalReceiveLength () << 
" != num" 
 3006       "Exports = " << numExports  << 
".  Please report this bug to the " 
 3007       "Tpetra developers.");
 
 3009     if (numExports > 0) {
 
 3010       exportGIDs.resize (numExports);
 
 3011       exportProcIDs.resize (numExports);
 
 3022     TEUCHOS_TEST_FOR_EXCEPTION(
 
 3023       sizeof (
size_t) < 
sizeof (OrdinalType), std::logic_error,
 
 3024       "Tpetra::Distributor::computeSends: sizeof(size_t) = " << 
sizeof(
size_t)
 
 3025       << 
" < sizeof(" << Teuchos::TypeNameTraits<OrdinalType>::name () << 
") = " 
 3026       << 
sizeof (OrdinalType) << 
".  This violates an assumption of the " 
 3027       "method.  It's not hard to work around (just use Array<OrdinalType> as " 
 3028       "the export buffer, not Array<size_t>), but we haven't done that yet.  " 
 3029       "Please report this bug to the Tpetra developers.");
 
 3031     TEUCHOS_TEST_FOR_EXCEPTION(
 
 3032       tempPlan.getTotalReceiveLength () < 
static_cast<size_t> (numExports),
 
 3034       "Tpetra::Distributor::computeSends: tempPlan.getTotalReceiveLength() = " 
 3035       << tempPlan.getTotalReceiveLength() << 
" < numExports = " << numExports
 
 3036       << 
".  Please report this bug to the Tpetra developers.");
 
 3038     Array<size_t> exportObjs (tempPlan.getTotalReceiveLength () * 2);
 
 3040       std::ostringstream os;
 
 3041       os << 
"Proc " << myRank << 
": computeSends: tempPlan.doPostsAndWaits" << endl;
 
 3044     tempPlan.doPostsAndWaits<
size_t> (importObjs (), 2, exportObjs ());
 
 3047     for (size_type i = 0; i < numExports; ++i) {
 
 3048       exportGIDs[i] = 
static_cast<OrdinalType
> (exportObjs[2*i]);
 
 3049       exportProcIDs[i] = 
static_cast<int> (exportObjs[2*i+1]);
 
 3053       std::ostringstream os;
 
 3054       os << 
"Proc " << myRank << 
": computeSends done" << endl;
 
 3059   template <
class OrdinalType>
 
 3061   createFromRecvs (
const Teuchos::ArrayView<const OrdinalType> &remoteGIDs,
 
 3062                    const Teuchos::ArrayView<const int> &remoteProcIDs,
 
 3063                    Teuchos::Array<OrdinalType> &exportGIDs,
 
 3064                    Teuchos::Array<int> &exportProcIDs)
 
 3068     Teuchos::OSTab tab (out_);
 
 3069     const int myRank = comm_->getRank();
 
 3072       *out_ << 
"Proc " << myRank << 
": createFromRecvs" << endl;
 
 3075 #ifdef HAVE_TPETRA_DEBUG 
 3076     using Teuchos::outArg;
 
 3077     using Teuchos::reduceAll;
 
 3082       (remoteGIDs.size () != remoteProcIDs.size ()) ? myRank : -1;
 
 3083     int maxErrProc = -1;
 
 3084     reduceAll<int, int> (*comm_, Teuchos::REDUCE_MAX, errProc, outArg (maxErrProc));
 
 3085     TEUCHOS_TEST_FOR_EXCEPTION(maxErrProc != -1, std::runtime_error,
 
 3086       Teuchos::typeName (*
this) << 
"::createFromRecvs(): lists of remote IDs " 
 3087       "and remote process IDs must have the same size on all participating " 
 3088       "processes.  Maximum process ID with error: " << maxErrProc << 
".");
 
 3089 #else // NOT HAVE_TPETRA_DEBUG 
 3092     TEUCHOS_TEST_FOR_EXCEPTION(
 
 3093       remoteGIDs.size () != remoteProcIDs.size (), std::invalid_argument,
 
 3094       Teuchos::typeName (*
this) << 
"::createFromRecvs<" <<
 
 3095       Teuchos::TypeNameTraits<OrdinalType>::name () << 
">(): On Process " <<
 
 3096       myRank << 
": remoteGIDs.size() = " << remoteGIDs.size () << 
" != " 
 3097       "remoteProcIDs.size() = " << remoteProcIDs.size () << 
".");
 
 3098 #endif // HAVE_TPETRA_DEBUG 
 3100     computeSends (remoteGIDs, remoteProcIDs, exportGIDs, exportProcIDs);
 
 3102     const size_t numProcsSendingToMe = 
createFromSends (exportProcIDs ());
 
 3109       std::ostringstream os;
 
 3110       os << 
"Proc " << myRank << 
": {numProcsSendingToMe: " 
 3111          << numProcsSendingToMe << 
", remoteProcIDs.size(): " 
 3112          << remoteProcIDs.size () << 
", selfMessage_: " 
 3113          << (selfMessage_ ? 
"true" : 
"false") << 
"}" << std::endl;
 
 3118       *out_ << 
"Proc " << myRank << 
": createFromRecvs done" << endl;
 
 3121     howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS;
 
 3127 #endif // TPETRA_DISTRIBUTOR_HPP 
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan. 
size_t getNumReceives() const 
The number of processes from which we will receive data. 
std::string description() const 
Return a one-line description of this object. 
EDistributorHowInitialized
Enum indicating how and whether a Distributor was initialized. 
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const 
List of valid Distributor parameters. 
virtual ~Distributor()=default
Destructor (virtual for memory safety). 
void swap(Distributor &rhs)
Swap the contents of rhs with those of *this. 
std::string DistributorSendTypeEnumToString(EDistributorSendType sendType)
Convert an EDistributorSendType enum value to a string. 
void doReversePostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the reverse communication plan. 
Teuchos::ArrayView< const size_t > getLengthsFrom() const 
Number of values this process will receive from each process. 
Teuchos::ArrayView< const int > getProcsFrom() const 
Ranks of the processes sending values to this process. 
size_t createFromSends(const Teuchos::ArrayView< const int > &exportProcIDs)
Set up Distributor using list of process ranks to which this process will send. 
Details::EDistributorHowInitialized howInitialized() const 
Return an enum indicating whether and how a Distributor was initialized. 
void doPosts(const Teuchos::ArrayRCP< const Packet > &exports, size_t numPackets, const Teuchos::ArrayRCP< Packet > &imports)
Post the data for a forward plan, but do not execute the waits yet. 
Teuchos::ArrayView< const int > getProcsTo() const 
Ranks of the processes to which this process will send values. 
void createFromSendsAndRecvs(const Teuchos::ArrayView< const int > &exportProcIDs, const Teuchos::ArrayView< const int > &remoteProcIDs)
Set up Distributor using list of process ranks to which to send, and list of process ranks from which...
bool hasSelfMessage() const 
Whether the calling process will send or receive messages to itself. 
Sets up and executes a communication plan for a Tpetra DistObject. 
size_t getTotalReceiveLength() const 
Total number of values this process will receive from other processes. 
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &plist)
Set Distributor parameters. 
void doReversePosts(const Teuchos::ArrayRCP< const Packet > &exports, size_t numPackets, const Teuchos::ArrayRCP< Packet > &imports)
Post the data for a reverse plan, but do not execute the waits yet. 
Teuchos::ArrayView< const size_t > getLengthsTo() const 
Number of values this process will send to each process. 
Teuchos::Array< std::string > distributorSendTypes()
Valid values for Distributor's "Send type" parameter. 
std::string DistributorHowInitializedEnumToString(EDistributorHowInitialized how)
Convert an EDistributorHowInitialized enum value to a string. 
Stand-alone utility functions and macros. 
void getLastDoStatistics(size_t &bytes_sent, size_t &bytes_recvd) const 
Information on the last call to do/doReverse. 
size_t getNumSends() const 
The number of processes to which we will send data. 
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const 
Describe this object in a human-readable way to the given output stream. 
size_t getMaxSendLength() const 
Maximum number of values this process will send to another single process. 
Teuchos::RCP< Distributor > getReverse() const 
A reverse communication plan Distributor. 
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive. 
EDistributorSendType
The type of MPI send that Distributor should use. 
Distributor(const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Construct using the specified communicator and default parameters. 
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.