10 #ifndef TPETRA_DETAILS_DISTRIBUTOR_ACTOR_HPP
11 #define TPETRA_DETAILS_DISTRIBUTOR_ACTOR_HPP
13 #include "Teuchos_Assert.hpp"
17 #include "Teuchos_Array.hpp"
18 #include "Teuchos_Comm.hpp"
21 #include "Teuchos_RCP.hpp"
23 #include "Kokkos_TeuchosCommAdapters.hpp"
25 #ifdef HAVE_TPETRA_MPI
29 namespace Tpetra::Details {
32 constexpr
bool isKokkosView = Kokkos::is_view<View>::value;
34 template <
class View1,
class View2>
35 constexpr
bool areKokkosViews = Kokkos::is_view<View1>::value && Kokkos::is_view<View2>::value;
37 class DistributorActor {
38 static constexpr
int DEFAULT_MPI_TAG = 1;
40 using IndexView = DistributorPlan::IndexView;
41 using SubViewLimits = DistributorPlan::SubViewLimits;
45 DistributorActor(
const DistributorActor& otherActor);
47 template <
class ExpView,
class ImpView>
48 void doPostsAndWaits(
const DistributorPlan& plan,
49 const ExpView &exports,
51 const ImpView &imports);
53 template <
class ExpView,
class ImpView>
54 void doPostsAndWaits(
const DistributorPlan& plan,
55 const ExpView &exports,
56 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
57 const ImpView &imports,
58 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
60 template <
class ImpView>
61 void doPostRecvs(
const DistributorPlan& plan,
63 const ImpView& imports);
65 template <
class ImpView>
66 void doPostRecvs(
const DistributorPlan& plan,
67 const ImpView &imports,
68 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
70 template <
class ExpView,
class ImpView>
71 void doPostSends(
const DistributorPlan& plan,
72 const ExpView& exports,
74 const ImpView& imports);
76 template <
class ExpView,
class ImpView>
77 void doPostSends(
const DistributorPlan& plan,
78 const ExpView &exports,
79 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
80 const ImpView &imports,
81 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
83 template <
class ExpView,
class ImpView>
84 void doPosts(
const DistributorPlan& plan,
85 const ExpView& exports,
87 const ImpView& imports);
89 template <
class ExpView,
class ImpView>
90 void doPosts(
const DistributorPlan& plan,
91 const ExpView &exports,
92 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
93 const ImpView &imports,
94 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
96 void doWaits(
const DistributorPlan& plan);
98 void doWaitsRecv(
const DistributorPlan& plan);
100 void doWaitsSend(
const DistributorPlan& plan);
102 bool isReady()
const;
106 template <
class ImpView>
107 void doPostRecvsImpl(
const DistributorPlan& plan,
108 const ImpView& imports,
109 const SubViewLimits& totalPacketsFrom);
111 template <
class ExpView,
class ImpView>
112 void doPostSendsImpl(
const DistributorPlan& plan,
113 const ExpView& exports,
114 const SubViewLimits& exportSubViewLimits,
115 const ImpView& imports,
116 const SubViewLimits& importSubViewLimits);
118 #ifdef HAVE_TPETRA_MPI
119 template <
class ExpView,
class ImpView>
120 void doPostsAllToAllImpl(
const DistributorPlan &plan,
121 const ExpView &exports,
122 const SubViewLimits& exportSubViewLimits,
123 const ImpView &imports,
124 const SubViewLimits& importSubViewLimits);
126 #if defined(HAVE_TPETRACORE_MPI_ADVANCE)
127 template <
class ExpView,
class ImpView>
128 void doPostsNbrAllToAllVImpl(
const DistributorPlan &plan,
129 const ExpView &exports,
130 const SubViewLimits& exportSubViewLimits,
131 const ImpView &imports,
132 const SubViewLimits& importSubViewLimits);
133 #endif // HAVE_TPETRACORE_MPI_ADVANCE
134 #endif // HAVE_TPETRA_CORE
138 Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest<int>>> requestsRecv_;
139 Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest<int>>> requestsSend_;
142 template <
class ExpView,
class ImpView>
143 void DistributorActor::doPosts(
const DistributorPlan& plan,
144 const ExpView& exports,
146 const ImpView& imports)
148 doPostRecvs(plan, numPackets, imports);
149 doPostSends(plan, exports, numPackets, imports);
152 template <
class ExpView,
class ImpView>
153 void DistributorActor::doPostsAndWaits(
const DistributorPlan& plan,
154 const ExpView& exports,
156 const ImpView& imports)
158 static_assert(areKokkosViews<ExpView, ImpView>,
159 "Data arrays for DistributorActor::doPostsAndWaits must be Kokkos::Views");
160 doPosts(plan, exports, numPackets, imports);
164 template <
class ExpView,
class ImpView>
165 void DistributorActor::doPosts(
const DistributorPlan& plan,
166 const ExpView &exports,
167 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
168 const ImpView &imports,
169 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
171 doPostRecvs(plan, imports, numImportPacketsPerLID);
172 doPostSends(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID);
175 template <
class ExpView,
class ImpView>
176 void DistributorActor::doPostsAndWaits(
const DistributorPlan& plan,
177 const ExpView& exports,
178 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
179 const ImpView& imports,
180 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
182 static_assert(areKokkosViews<ExpView, ImpView>,
183 "Data arrays for DistributorActor::doPostsAndWaits must be Kokkos::Views");
184 doPosts(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID);
188 template <
typename ViewType>
189 using HostAccessibility = Kokkos::SpaceAccessibility<Kokkos::DefaultHostExecutionSpace, typename ViewType::memory_space>;
191 template <
typename DstViewType,
typename SrcViewType>
192 using enableIfHostAccessible = std::enable_if_t<HostAccessibility<DstViewType>::accessible &&
193 HostAccessibility<SrcViewType>::accessible>;
195 template <
typename DstViewType,
typename SrcViewType>
196 using enableIfNotHostAccessible = std::enable_if_t<!HostAccessibility<DstViewType>::accessible ||
197 !HostAccessibility<SrcViewType>::accessible>;
199 template <
typename DstViewType,
typename SrcViewType>
200 enableIfHostAccessible<DstViewType, SrcViewType>
201 packOffset(
const DstViewType& dst,
202 const SrcViewType& src,
203 const size_t dst_offset,
204 const size_t src_offset,
207 memcpy((
void*) (dst.data()+dst_offset), src.data()+src_offset, size*
sizeof(
typename DstViewType::value_type));
210 template <
typename DstViewType,
typename SrcViewType>
211 enableIfNotHostAccessible<DstViewType, SrcViewType>
212 packOffset(
const DstViewType& dst,
213 const SrcViewType& src,
214 const size_t dst_offset,
215 const size_t src_offset,
218 Kokkos::Compat::deep_copy_offset(dst, src, dst_offset, src_offset, size);
221 #ifdef HAVE_TPETRA_MPI
222 template <
class ExpView,
class ImpView>
223 void DistributorActor::doPostsAllToAllImpl(
const DistributorPlan &plan,
224 const ExpView &exports,
225 const SubViewLimits& exportSubViewLimits,
226 const ImpView &imports,
227 const SubViewLimits& importSubViewLimits) {
228 TEUCHOS_TEST_FOR_EXCEPTION(
229 !plan.getIndicesTo().is_null(), std::runtime_error,
230 "Send Type=\"Alltoall\" only works for fast-path communication.");
232 using size_type = Teuchos::Array<size_t>::size_type;
234 auto comm = plan.getComm();
235 std::vector<int> sendcounts(comm->getSize(), 0);
236 std::vector<int> sdispls(comm->getSize(), 0);
237 std::vector<int> recvcounts(comm->getSize(), 0);
238 std::vector<int> rdispls(comm->getSize(), 0);
240 auto& [importStarts, importLengths] = importSubViewLimits;
241 auto& [exportStarts, exportLengths] = exportSubViewLimits;
243 for (
size_t pp = 0; pp < plan.getNumSends(); ++pp) {
244 sdispls[plan.getProcsTo()[pp]] = exportStarts[pp];
245 size_t numPackets = exportLengths[pp];
247 TEUCHOS_TEST_FOR_EXCEPTION(numPackets >
size_t(INT_MAX), std::logic_error,
248 "Tpetra::Distributor::doPostsAllToAll: "
249 "Send count for send "
250 << pp <<
" (" << numPackets
252 "to be represented as int.");
253 sendcounts[plan.getProcsTo()[pp]] =
static_cast<int>(numPackets);
256 const size_type actualNumReceives =
257 Teuchos::as<size_type>(plan.getNumReceives()) +
258 Teuchos::as<size_type>(plan.hasSelfMessage() ? 1 : 0);
260 for (size_type i = 0; i < actualNumReceives; ++i) {
261 rdispls[plan.getProcsFrom()[i]] = importStarts[i];
262 size_t totalPacketsFrom_i = importLengths[i];
265 TEUCHOS_TEST_FOR_EXCEPTION(totalPacketsFrom_i >
size_t(INT_MAX),
267 "Tpetra::Distributor::doPostsAllToAll: "
268 "Recv count for receive "
269 << i <<
" (" << totalPacketsFrom_i
271 "to be represented as int.");
272 recvcounts[plan.getProcsFrom()[i]] =
static_cast<int>(totalPacketsFrom_i);
275 Teuchos::RCP<const Teuchos::MpiComm<int>> mpiComm =
276 Teuchos::rcp_dynamic_cast<
const Teuchos::MpiComm<int>>(comm);
277 Teuchos::RCP<const Teuchos::OpaqueWrapper<MPI_Comm>> rawComm =
278 mpiComm->getRawMpiComm();
279 using T =
typename ExpView::non_const_value_type;
280 MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits<T>::getType(T());
282 #if defined(HAVE_TPETRACORE_MPI_ADVANCE)
283 if (Details::DISTRIBUTOR_MPIADVANCE_ALLTOALL == plan.getSendType()) {
284 MPIX_Comm *mpixComm = *plan.getMPIXComm();
285 TEUCHOS_TEST_FOR_EXCEPTION(!mpixComm, std::runtime_error,
286 "MPIX_Comm is null in doPostsAllToAll \""
287 << __FILE__ <<
":" << __LINE__);
289 const int err = MPIX_Alltoallv(
290 exports.data(), sendcounts.data(), sdispls.data(), rawType,
291 imports.data(), recvcounts.data(), rdispls.data(), rawType, mpixComm);
293 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
294 "MPIX_Alltoallv failed with error \""
295 << Teuchos::mpiErrorCodeToString(err)
300 #endif // HAVE_TPETRACORE_MPI_ADVANCE
302 const int err = MPI_Alltoallv(
303 exports.data(), sendcounts.data(), sdispls.data(), rawType,
304 imports.data(), recvcounts.data(), rdispls.data(), rawType, (*rawComm)());
306 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
307 "MPI_Alltoallv failed with error \""
308 << Teuchos::mpiErrorCodeToString(err)
312 #if defined(HAVE_TPETRACORE_MPI_ADVANCE)
313 template <
class ExpView,
class ImpView>
314 void DistributorActor::doPostsNbrAllToAllVImpl(
const DistributorPlan &plan,
315 const ExpView &exports,
316 const SubViewLimits& exportSubViewLimits,
317 const ImpView &imports,
318 const SubViewLimits& importSubViewLimits) {
319 TEUCHOS_TEST_FOR_EXCEPTION(
320 !plan.getIndicesTo().is_null(), std::runtime_error,
321 "Send Type=\"Alltoall\" only works for fast-path communication.");
323 const int myRank = plan.getComm()->getRank();
324 MPIX_Comm *mpixComm = *plan.getMPIXComm();
326 const size_t numSends = plan.getNumSends() + plan.hasSelfMessage();
327 const size_t numRecvs = plan.getNumReceives() + plan.hasSelfMessage();
328 std::vector<int> sendcounts(numSends, 0);
329 std::vector<int> sdispls(numSends, 0);
330 std::vector<int> recvcounts(numRecvs, 0);
331 std::vector<int> rdispls(numRecvs, 0);
333 auto& [importStarts, importLengths] = importSubViewLimits;
334 auto& [exportStarts, exportLengths] = exportSubViewLimits;
336 for (
size_t pp = 0; pp < plan.getNumSends(); ++pp) {
337 sdispls[plan.getProcsTo()[pp]] = exportStarts[pp];
338 size_t numPackets = exportLengths[pp];
340 TEUCHOS_TEST_FOR_EXCEPTION(numPackets >
size_t(INT_MAX), std::logic_error,
341 "Tpetra::Distributor::doPostsNbrAllToAllV: "
342 "Send count for send "
343 << pp <<
" (" << numPackets
345 "to be represented as int.");
346 sendcounts[plan.getProcsTo()[pp]] =
static_cast<int>(numPackets);
349 const size_type actualNumReceives =
350 Teuchos::as<size_type>(plan.getNumReceives()) +
351 Teuchos::as<size_type>(plan.hasSelfMessage() ? 1 : 0);
353 for (size_type i = 0; i < actualNumReceives; ++i) {
354 rdispls[plan.getProcsFrom()[i]] = importStarts(i);
355 size_t totalPacketsFrom_i = importLengths(i);
358 TEUCHOS_TEST_FOR_EXCEPTION(totalPacketsFrom_i >
size_t(INT_MAX),
360 "Tpetra::Distributor::doPostsNbrAllToAllV: "
361 "Recv count for receive "
362 << i <<
" (" << totalPacketsFrom_i
364 "to be represented as int.");
365 recvcounts[plan.getProcsFrom()[i]] =
static_cast<int>(totalPacketsFrom_i);
368 using T =
typename ExpView::non_const_value_type;
369 MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits<T>::getType(T());
371 const int err = MPIX_Neighbor_alltoallv(
372 exports.data(), sendcounts.data(), sdispls.data(), rawType,
373 imports.data(), recvcounts.data(), rdispls.data(), rawType, mpixComm);
375 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
376 "MPIX_Neighbor_alltoallv failed with error \""
377 << Teuchos::mpiErrorCodeToString(err)
380 #endif // HAVE_TPETRACORE_MPI_ADVANCE
381 #endif // HAVE_TPETRA_MPI
383 template <
class ImpView>
384 void DistributorActor::doPostRecvs(
const DistributorPlan& plan,
386 const ImpView& imports)
388 auto importSubViewLimits = plan.getImportViewLimits(numPackets);
389 doPostRecvsImpl(plan, imports, importSubViewLimits);
392 template <
class ImpView>
393 void DistributorActor::doPostRecvs(
const DistributorPlan& plan,
394 const ImpView &imports,
395 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
397 auto importSubViewLimits = plan.getImportViewLimits(numImportPacketsPerLID);
398 doPostRecvsImpl(plan, imports, importSubViewLimits);
401 template <
class ImpView>
402 void DistributorActor::doPostRecvsImpl(
const DistributorPlan& plan,
403 const ImpView &imports,
404 const SubViewLimits& importSubViewLimits)
406 static_assert(isKokkosView<ImpView>,
407 "Data arrays for DistributorActor::doPostRecvs must be Kokkos::Views");
408 using Teuchos::Array;
410 using Teuchos::ireceive;
411 using Kokkos::Compat::subview_offset;
412 using size_type = Array<size_t>::size_type;
413 using imports_view_type = ImpView;
415 #ifdef KOKKOS_ENABLE_CUDA
416 static_assert (! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
417 "Please do not use Tpetra::Distributor with UVM "
418 "allocations. See GitHub issue #1088.");
419 #endif // KOKKOS_ENABLE_CUDA
421 #ifdef KOKKOS_ENABLE_SYCL
422 static_assert (! std::is_same<typename ImpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value,
423 "Please do not use Tpetra::Distributor with SharedUSM "
424 "allocations. See GitHub issue #1088 (corresponding to CUDA).");
425 #endif // KOKKOS_ENABLE_SYCL
428 #if defined(HAVE_TPETRA_MPI)
434 if ((sendType == Details::DISTRIBUTOR_ALLTOALL)
435 #ifdef HAVE_TPETRACORE_MPI_ADVANCE
436 || (sendType == Details::DISTRIBUTOR_MPIADVANCE_ALLTOALL)
437 || (sendType == Details::DISTRIBUTOR_MPIADVANCE_NBRALLTOALLV)
442 #endif // HAVE_TPETRA_MPI
444 ProfilingRegion pr(
"Tpetra::Distributor: doPostRecvs");
446 const int myProcID = plan.getComm()->getRank ();
448 auto& [importStarts, importLengths] = importSubViewLimits;
463 const size_type actualNumReceives = as<size_type> (plan.getNumReceives()) +
464 as<size_type> (plan.hasSelfMessage() ? 1 : 0);
466 #ifdef HAVE_TPETRA_DEBUG
467 size_t totalNumImportPackets = 0;
468 for (
size_t i = 0; i < Teuchos::as<size_t>(actualNumReceives); ++i) {
469 totalNumImportPackets += importLengths[i];
471 TEUCHOS_TEST_FOR_EXCEPTION(
472 imports.extent (0) < totalNumImportPackets, std::runtime_error,
473 "Tpetra::Distributor::doPostRecvs: The 'imports' array must have "
474 "enough entries to hold the expected number of import packets. "
475 "imports.extent(0) = " << imports.extent (0) <<
" < "
476 "totalNumImportPackets = " << totalNumImportPackets <<
".");
477 TEUCHOS_TEST_FOR_EXCEPTION
478 (!requestsRecv_.empty(), std::logic_error,
"Tpetra::Distributor::"
479 "doPostRecvs: Process " << myProcID <<
": requestsRecv_.size () = "
480 << requestsRecv_.size () <<
" != 0.");
481 #endif // HAVE_TPETRA_DEBUG
483 requestsRecv_.resize (0);
491 ProfilingRegion prr(
"Tpetra::Distributor: doPostRecvs recvs");
493 for (size_type i = 0; i < actualNumReceives; ++i) {
494 size_t totalPacketsFrom_i = importLengths[Teuchos::as<size_t>(i)];
495 TEUCHOS_TEST_FOR_EXCEPTION(totalPacketsFrom_i >
size_t(INT_MAX),
496 std::logic_error,
"Tpetra::Distributor::doPostRecvs: "
497 "Recv count for receive " << i <<
" (" << totalPacketsFrom_i <<
") is too large "
498 "to be represented as int.");
499 if (plan.getProcsFrom()[i] != myProcID && totalPacketsFrom_i) {
508 imports_view_type recvBuf =
509 subview_offset (imports, importStarts[i], totalPacketsFrom_i);
510 requestsRecv_.push_back (ireceive<int> (recvBuf, plan.getProcsFrom()[i],
511 mpiTag_, *plan.getComm()));
517 template <
class ExpView,
class ImpView>
518 void DistributorActor::doPostSends(
const DistributorPlan& plan,
519 const ExpView& exports,
521 const ImpView& imports)
523 auto exportSubViewLimits = plan.getExportViewLimits(numPackets);
524 auto importSubViewLimits = plan.getImportViewLimits(numPackets);
525 doPostSendsImpl(plan, exports, exportSubViewLimits, imports, importSubViewLimits);
528 template <
class ExpView,
class ImpView>
529 void DistributorActor::doPostSends(
const DistributorPlan& plan,
530 const ExpView &exports,
531 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
532 const ImpView &imports,
533 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
535 auto exportSubViewLimits = plan.getExportViewLimits(numExportPacketsPerLID);
536 auto importSubViewLimits = plan.getImportViewLimits(numImportPacketsPerLID);
537 doPostSendsImpl(plan, exports, exportSubViewLimits, imports, importSubViewLimits);
540 template <
class ExpView,
class ImpView>
541 void DistributorActor::doPostSendsImpl(
const DistributorPlan& plan,
542 const ExpView& exports,
543 const SubViewLimits& exportSubViewLimits,
544 const ImpView& imports,
545 const SubViewLimits& importSubViewLimits)
547 static_assert(areKokkosViews<ExpView, ImpView>,
548 "Data arrays for DistributorActor::doPostSends must be Kokkos::Views");
549 using Teuchos::Array;
551 using Teuchos::isend;
553 using Kokkos::Compat::subview_offset;
554 using Kokkos::Compat::deep_copy_offset;
555 using size_type = Array<size_t>::size_type;
556 using exports_view_type = ExpView;
558 #ifdef KOKKOS_ENABLE_CUDA
560 (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
561 ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
562 "Please do not use Tpetra::Distributor with UVM allocations. "
563 "See Trilinos GitHub issue #1088.");
564 #endif // KOKKOS_ENABLE_CUDA
566 #ifdef KOKKOS_ENABLE_SYCL
568 (! std::is_same<typename ExpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value &&
569 ! std::is_same<typename ImpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value,
570 "Please do not use Tpetra::Distributor with SharedUSM allocations. "
571 "See Trilinos GitHub issue #1088 (corresponding to CUDA).");
572 #endif // KOKKOS_ENABLE_SYCL
574 ProfilingRegion ps(
"Tpetra::Distributor: doPostSends");
576 const int myRank = plan.getComm()->getRank ();
581 auto& [exportStarts, exportLengths] = exportSubViewLimits;
582 auto& [importStarts, importLengths] = importSubViewLimits;
584 #if defined(HAVE_TPETRA_MPI)
588 if (sendType == Details::DISTRIBUTOR_ALLTOALL) {
589 doPostsAllToAllImpl(plan, exports, exportSubViewLimits, imports, importSubViewLimits);
592 #ifdef HAVE_TPETRACORE_MPI_ADVANCE
593 else if (sendType == Details::DISTRIBUTOR_MPIADVANCE_ALLTOALL) {
594 doPostsAllToAllImpl(plan, exports, exportSubViewLimits, imports, importSubViewLimits);
596 }
else if (sendType == Details::DISTRIBUTOR_MPIADVANCE_NBRALLTOALLV) {
597 doPostsNbrAllToAllVImpl(plan, exports,numPackets, imports);
600 #endif // defined(HAVE_TPETRACORE_MPI_ADVANCE)
603 #else // HAVE_TPETRA_MPI
604 if (plan.hasSelfMessage()) {
612 size_t selfReceiveOffset = 0;
613 deep_copy_offset(imports, exports, selfReceiveOffset,
619 #endif // HAVE_TPETRA_MPI
621 size_t selfReceiveOffset = 0;
623 #ifdef HAVE_TPETRA_DEBUG
624 TEUCHOS_TEST_FOR_EXCEPTION
625 (requestsSend_.size () != 0,
627 "Tpetra::Distributor::doPostSends: Process "
628 << myRank <<
": requestsSend_.size() = " << requestsSend_.size () <<
" != 0.");
629 #endif // HAVE_TPETRA_DEBUG
644 const size_type actualNumReceives = as<size_type> (plan.getNumReceives()) +
645 as<size_type> (plan.hasSelfMessage() ? 1 : 0);
646 requestsSend_.resize (0);
649 for (size_type i = 0; i < actualNumReceives; ++i) {
650 if (plan.getProcsFrom()[i] == myRank) {
651 selfReceiveOffset = importStarts[i];
656 ProfilingRegion pss(
"Tpetra::Distributor: doPostSends sends");
663 size_t numBlocks = plan.getNumSends() + plan.hasSelfMessage();
664 size_t procIndex = 0;
665 while ((procIndex < numBlocks) && (plan.getProcsTo()[procIndex] < myRank)) {
668 if (procIndex == numBlocks) {
673 size_t selfIndex = 0;
675 if (plan.getIndicesTo().is_null()) {
676 ProfilingRegion pssf(
"Tpetra::Distributor: doPostSends sends FAST");
680 for (
size_t i = 0; i < numBlocks; ++i) {
681 size_t p = i + procIndex;
682 if (p > (numBlocks - 1)) {
686 if (plan.getProcsTo()[p] != myRank) {
687 if (exportLengths[p] == 0) {
692 exports_view_type tmpSend = subview_offset(exports, exportStarts[p], exportLengths[p]);
694 if (sendType == Details::DISTRIBUTOR_ISEND) {
698 exports_view_type tmpSendBuf =
699 subview_offset (exports, exportStarts[p], exportLengths[p]);
700 requestsSend_.push_back (isend<int> (tmpSendBuf, plan.getProcsTo()[p],
701 mpiTag_, *plan.getComm()));
705 as<int> (tmpSend.size ()),
706 plan.getProcsTo()[p], mpiTag_, *plan.getComm());
714 if (plan.hasSelfMessage()) {
722 deep_copy_offset(imports, exports, selfReceiveOffset,
723 exportStarts[selfNum], exportLengths[selfNum]);
728 ProfilingRegion psss(
"Tpetra::Distributor: doPostSends: sends SLOW");
730 using Packet =
typename ExpView::non_const_value_type;
731 using Layout =
typename ExpView::array_layout;
732 using Device =
typename ExpView::device_type;
733 using Mem =
typename ExpView::memory_traits;
743 #ifdef HAVE_TPETRA_DEBUG
744 if (sendType != Details::DISTRIBUTOR_SEND) {
745 if (plan.getComm()->getRank() == 0)
746 std::cout <<
"The requested Tpetra send type "
748 <<
" requires Distributor data to be ordered by"
749 <<
" the receiving processor rank. Since these"
750 <<
" data are not ordered, Tpetra will use Send"
751 <<
" instead." << std::endl;
755 size_t maxSendLength = 0;
756 for (
size_t i = 0; i < numBlocks; ++i) {
757 size_t p = i + procIndex;
758 if (p > (numBlocks - 1)) {
762 size_t sendArrayOffset = 0;
763 size_t j = plan.getStartsTo()[p];
764 for (
size_t k = 0; k < plan.getLengthsTo()[p]; ++k, ++j) {
765 sendArrayOffset += exportLengths[j];
767 maxSendLength = std::max(maxSendLength, sendArrayOffset);
769 Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray", maxSendLength);
771 for (
size_t i = 0; i < numBlocks; ++i) {
772 size_t p = i + procIndex;
773 if (p > (numBlocks - 1)) {
777 if (plan.getProcsTo()[p] != myRank) {
778 size_t sendArrayOffset = 0;
779 size_t j = plan.getStartsTo()[p];
780 for (
size_t k = 0; k < plan.getLengthsTo()[p]; ++k, ++j) {
781 packOffset(sendArray, exports, sendArrayOffset, exportStarts[j], exportLengths[j]);
782 sendArrayOffset += exportLengths[j];
784 typename ExpView::execution_space().fence();
787 subview_offset(sendArray,
size_t(0), sendArrayOffset);
790 as<int> (tmpSend.size ()),
791 plan.getProcsTo()[p], mpiTag_, *plan.getComm());
795 selfIndex = plan.getStartsTo()[p];
799 if (plan.hasSelfMessage()) {
800 for (
size_t k = 0; k < plan.getLengthsTo()[selfNum]; ++k) {
801 packOffset(imports, exports, selfReceiveOffset, exportStarts[selfIndex], exportLengths[selfIndex]);
802 selfReceiveOffset += exportLengths[selfIndex];
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex<float> and Kokkos::complex...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
std::string DistributorSendTypeEnumToString(EDistributorSendType sendType)
Convert an EDistributorSendType enum value to a string.
Stand-alone utility functions and macros.
EDistributorSendType
The type of MPI send that Distributor should use.