10 #ifndef TPETRA_DETAILS_DISTRIBUTOR_ACTOR_HPP
11 #define TPETRA_DETAILS_DISTRIBUTOR_ACTOR_HPP
16 #include "Teuchos_Assert.hpp"
20 #include "Teuchos_Array.hpp"
21 #include "Teuchos_Comm.hpp"
24 #include "Teuchos_RCP.hpp"
26 #include "Kokkos_TeuchosCommAdapters.hpp"
28 #ifdef HAVE_TPETRA_MPI
30 #include "Tpetra_Details_Ialltofewv.hpp"
33 namespace Tpetra::Details {
36 constexpr
bool isKokkosView = Kokkos::is_view<View>::value;
38 template <
class View1,
class View2>
39 constexpr
bool areKokkosViews = Kokkos::is_view<View1>::value&& Kokkos::is_view<View2>::value;
41 class DistributorActor {
42 using IndexView = DistributorPlan::IndexView;
43 using SubViewLimits = DistributorPlan::SubViewLimits;
46 static constexpr
int DEFAULT_MPI_TAG = 1;
49 DistributorActor(
const DistributorActor& otherActor) =
default;
51 template <
class ExpView,
class ImpView>
52 void doPostsAndWaits(
const DistributorPlan& plan,
53 const ExpView& exports,
55 const ImpView& imports);
57 template <
class ExpView,
class ImpView>
58 void doPostsAndWaits(
const DistributorPlan& plan,
59 const ExpView& exports,
60 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
61 const ImpView& imports,
62 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
64 template <
class ImpView>
65 void doPostRecvs(
const DistributorPlan& plan,
67 const ImpView& imports);
69 template <
class ImpView>
70 void doPostRecvs(
const DistributorPlan& plan,
71 const ImpView& imports,
72 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
74 template <
class ExpView,
class ImpView>
75 void doPostSends(
const DistributorPlan& plan,
76 const ExpView& exports,
78 const ImpView& imports);
80 template <
class ExpView,
class ImpView>
81 void doPostSends(
const DistributorPlan& plan,
82 const ExpView& exports,
83 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
84 const ImpView& imports,
85 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
87 template <
class ExpView,
class ImpView>
88 void doPosts(
const DistributorPlan& plan,
89 const ExpView& exports,
91 const ImpView& imports);
93 template <
class ExpView,
class ImpView>
94 void doPosts(
const DistributorPlan& plan,
95 const ExpView& exports,
96 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
97 const ImpView& imports,
98 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
100 void doWaits(
const DistributorPlan& plan);
102 void doWaitsRecv(
const DistributorPlan& plan);
104 void doWaitsSend(
const DistributorPlan& plan);
106 void doWaitsIalltofewv(
const DistributorPlan& plan);
108 bool isReady()
const;
110 int getMpiTag()
const {
return mpiTag_; };
113 template <
class ImpView>
114 void doPostRecvsImpl(
const DistributorPlan& plan,
115 const ImpView& imports,
116 const SubViewLimits& totalPacketsFrom);
118 template <
class ExpView,
class ImpView>
119 void doPostSendsImpl(
const DistributorPlan& plan,
120 const ExpView& exports,
121 const SubViewLimits& exportSubViewLimits,
122 const ImpView& imports,
123 const SubViewLimits& importSubViewLimits);
125 #ifdef HAVE_TPETRA_MPI
126 template <
class ExpView,
class ImpView>
127 void doPostsAllToAllImpl(
const DistributorPlan& plan,
128 const ExpView& exports,
129 const SubViewLimits& exportSubViewLimits,
130 const ImpView& imports,
131 const SubViewLimits& importSubViewLimits);
133 #if defined(HAVE_TPETRACORE_MPI_ADVANCE)
134 template <
class ExpView,
class ImpView>
135 void doPostsNbrAllToAllVImpl(
const DistributorPlan& plan,
136 const ExpView& exports,
137 const SubViewLimits& exportSubViewLimits,
138 const ImpView& imports,
139 const SubViewLimits& importSubViewLimits);
140 #endif // HAVE_TPETRACORE_MPI_ADVANCE
142 template <
typename ExpView,
typename ImpView>
143 void doPostsIalltofewvImpl(
const DistributorPlan& plan,
144 const ExpView& exports,
145 const SubViewLimits& exportSubViewLimits,
146 const ImpView& imports,
147 const SubViewLimits& importSubViewLimits);
151 Details::Ialltofewv impl;
152 std::optional<Details::Ialltofewv::Req> req;
153 Teuchos::RCP<std::vector<int>> sendcounts;
154 Teuchos::RCP<std::vector<int>> sdispls;
155 Teuchos::RCP<std::vector<int>> recvcounts;
156 Teuchos::RCP<std::vector<int>> rdispls;
157 std::vector<int> roots;
160 #endif // HAVE_TPETRA_MPI
164 Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest<int>>> requestsRecv_;
165 Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest<int>>> requestsSend_;
168 template <
class ExpView,
class ImpView>
169 void DistributorActor::doPosts(
const DistributorPlan& plan,
170 const ExpView& exports,
172 const ImpView& imports) {
173 doPostRecvs(plan, numPackets, imports);
174 doPostSends(plan, exports, numPackets, imports);
177 template <
class ExpView,
class ImpView>
178 void DistributorActor::doPostsAndWaits(
const DistributorPlan& plan,
179 const ExpView& exports,
181 const ImpView& imports) {
182 static_assert(areKokkosViews<ExpView, ImpView>,
183 "Data arrays for DistributorActor::doPostsAndWaits must be Kokkos::Views");
184 doPosts(plan, exports, numPackets, imports);
188 template <
class ExpView,
class ImpView>
189 void DistributorActor::doPosts(
const DistributorPlan& plan,
190 const ExpView& exports,
191 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
192 const ImpView& imports,
193 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID) {
194 doPostRecvs(plan, imports, numImportPacketsPerLID);
195 doPostSends(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID);
198 template <
class ExpView,
class ImpView>
199 void DistributorActor::doPostsAndWaits(
const DistributorPlan& plan,
200 const ExpView& exports,
201 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
202 const ImpView& imports,
203 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID) {
204 static_assert(areKokkosViews<ExpView, ImpView>,
205 "Data arrays for DistributorActor::doPostsAndWaits must be Kokkos::Views");
206 doPosts(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID);
210 template <
typename ViewType>
211 using HostAccessibility = Kokkos::SpaceAccessibility<Kokkos::DefaultHostExecutionSpace, typename ViewType::memory_space>;
213 template <
typename DstViewType,
typename SrcViewType>
214 using enableIfHostAccessible = std::enable_if_t<HostAccessibility<DstViewType>::accessible &&
215 HostAccessibility<SrcViewType>::accessible>;
217 template <
typename DstViewType,
typename SrcViewType>
218 using enableIfNotHostAccessible = std::enable_if_t<!HostAccessibility<DstViewType>::accessible ||
219 !HostAccessibility<SrcViewType>::accessible>;
221 template <
typename DstViewType,
typename SrcViewType>
222 enableIfHostAccessible<DstViewType, SrcViewType>
223 packOffset(
const DstViewType& dst,
224 const SrcViewType& src,
225 const size_t dst_offset,
226 const size_t src_offset,
228 memcpy((
void*)(dst.data() + dst_offset), src.data() + src_offset, size *
sizeof(
typename DstViewType::value_type));
231 template <
typename DstViewType,
typename SrcViewType>
232 enableIfNotHostAccessible<DstViewType, SrcViewType>
233 packOffset(
const DstViewType& dst,
234 const SrcViewType& src,
235 const size_t dst_offset,
236 const size_t src_offset,
238 Kokkos::Compat::deep_copy_offset(dst, src, dst_offset, src_offset, size);
241 #ifdef HAVE_TPETRA_MPI
243 template <
class ExpView,
class ImpView>
244 void DistributorActor::doPostsIalltofewvImpl(
const DistributorPlan& plan,
245 const ExpView& exports,
246 const SubViewLimits& exportSubViewLimits,
247 const ImpView& imports,
248 const SubViewLimits& importSubViewLimits) {
249 using size_type = Teuchos::Array<size_t>::size_type;
250 using ExportValue =
typename ExpView::non_const_value_type;
252 ProfilingRegion pr(
"Tpetra::Distributor::doPostsIalltofewvImpl");
254 TEUCHOS_TEST_FOR_EXCEPTION(
255 !plan.getIndicesTo().is_null(), std::runtime_error,
256 "Send Type=\"Ialltofewv\" only works for fast-path communication.");
258 TEUCHOS_TEST_FOR_EXCEPTION(
259 bool(ialltofewv_.req), std::runtime_error,
260 "This actor has an active Ialltofewv already");
262 TEUCHOS_TEST_FOR_EXCEPTION(
263 bool(ialltofewv_.sendcounts), std::runtime_error,
264 "This actor has an active Ialltofewv already");
266 TEUCHOS_TEST_FOR_EXCEPTION(
267 bool(ialltofewv_.sdispls), std::runtime_error,
268 "This actor has an active Ialltofewv already");
270 TEUCHOS_TEST_FOR_EXCEPTION(
271 bool(ialltofewv_.recvcounts), std::runtime_error,
272 "This actor has an active Ialltofewv already");
274 TEUCHOS_TEST_FOR_EXCEPTION(
275 bool(ialltofewv_.rdispls), std::runtime_error,
276 "This actor has an active Ialltofewv already");
278 auto comm = plan.getComm();
280 const auto& [importStarts, importLengths] = importSubViewLimits;
281 const auto& [exportStarts, exportLengths] = exportSubViewLimits;
283 ialltofewv_.roots = plan.getRoots();
284 const int nroots = ialltofewv_.roots.size();
285 const int* roots = ialltofewv_.roots.data();
286 ialltofewv_.req = std::make_optional<Details::Ialltofewv::Req>();
287 ialltofewv_.sendcounts = Teuchos::rcp(
new std::vector<int>(nroots));
288 ialltofewv_.sdispls = Teuchos::rcp(
new std::vector<int>(nroots));
289 ialltofewv_.recvcounts = Teuchos::rcp(
new std::vector<int>);
290 ialltofewv_.rdispls = Teuchos::rcp(
new std::vector<int>);
292 for (
int rootIdx = 0; rootIdx < nroots; ++rootIdx) {
293 const int root = roots[rootIdx];
297 size_type rootProcIndex = plan.getProcsTo().size();
298 for (size_type pi = 0; pi < plan.getProcsTo().size(); ++pi) {
299 if (plan.getProcsTo()[pi] == root) {
307 if (rootProcIndex != plan.getProcsTo().size()) {
308 sendcount = exportLengths[rootProcIndex];
310 (*ialltofewv_.sendcounts)[rootIdx] = sendcount;
313 if (0 != sendcount) {
314 sdispl = exportStarts[rootProcIndex];
316 (*ialltofewv_.sdispls)[rootIdx] = sdispl;
319 if (comm->getRank() == root) {
321 ialltofewv_.recvcounts->resize(comm->getSize());
322 std::fill(ialltofewv_.recvcounts->begin(), ialltofewv_.recvcounts->end(), 0);
323 ialltofewv_.rdispls->resize(comm->getSize());
324 std::fill(ialltofewv_.rdispls->begin(), ialltofewv_.rdispls->end(), 0);
326 const size_type actualNumReceives =
327 Teuchos::as<size_type>(plan.getNumReceives()) +
328 Teuchos::as<size_type>(plan.hasSelfMessage() ? 1 : 0);
330 for (size_type i = 0; i < actualNumReceives; ++i) {
331 const int src = plan.getProcsFrom()[i];
332 (*ialltofewv_.rdispls)[src] = importStarts[i];
333 (*ialltofewv_.recvcounts)[src] = Teuchos::as<int>(importLengths[i]);
340 MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits<ExportValue>::getType(ExportValue{});
342 Teuchos::RCP<const Teuchos::MpiComm<int>> tMpiComm =
343 Teuchos::rcp_dynamic_cast<
const Teuchos::MpiComm<int>>(comm);
344 Teuchos::RCP<const Teuchos::OpaqueWrapper<MPI_Comm>> oMpiComm =
345 tMpiComm->getRawMpiComm();
346 MPI_Comm mpiComm = (*oMpiComm)();
350 constexpr
bool recvDevAccess = Kokkos::SpaceAccessibility<
351 Kokkos::DefaultExecutionSpace,
typename ImpView::memory_space>::accessible;
352 constexpr
bool sendDevAccess = Kokkos::SpaceAccessibility<
353 Kokkos::DefaultExecutionSpace,
typename ExpView::memory_space>::accessible;
354 static_assert(recvDevAccess == sendDevAccess,
"sending across host/device");
356 const int err = ialltofewv_.impl.post<recvDevAccess>(exports.data(), ialltofewv_.sendcounts->data(), ialltofewv_.sdispls->data(), rawType,
357 imports.data(), ialltofewv_.recvcounts->data(), ialltofewv_.rdispls->data(),
360 mpiTag_, mpiComm, &(*ialltofewv_.req));
362 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
363 "ialltofewv failed with error \""
364 << Teuchos::mpiErrorCodeToString(err)
368 template <
class ExpView,
class ImpView>
369 void DistributorActor::doPostsAllToAllImpl(
const DistributorPlan& plan,
370 const ExpView& exports,
371 const SubViewLimits& exportSubViewLimits,
372 const ImpView& imports,
373 const SubViewLimits& importSubViewLimits) {
374 TEUCHOS_TEST_FOR_EXCEPTION(
375 !plan.getIndicesTo().is_null(), std::runtime_error,
376 "Send Type=\"Alltoall\" only works for fast-path communication.");
378 using size_type = Teuchos::Array<size_t>::size_type;
380 auto comm = plan.getComm();
381 std::vector<int> sendcounts(comm->getSize(), 0);
382 std::vector<int> sdispls(comm->getSize(), 0);
383 std::vector<int> recvcounts(comm->getSize(), 0);
384 std::vector<int> rdispls(comm->getSize(), 0);
386 auto& [importStarts, importLengths] = importSubViewLimits;
387 auto& [exportStarts, exportLengths] = exportSubViewLimits;
389 for (
size_t pp = 0; pp < plan.getNumSends(); ++pp) {
390 sdispls[plan.getProcsTo()[pp]] = exportStarts[pp];
391 size_t numPackets = exportLengths[pp];
393 TEUCHOS_TEST_FOR_EXCEPTION(numPackets >
size_t(INT_MAX), std::logic_error,
394 "Tpetra::Distributor::doPostsAllToAll: "
395 "Send count for send "
396 << pp <<
" (" << numPackets
398 "to be represented as int.");
399 sendcounts[plan.getProcsTo()[pp]] =
static_cast<int>(numPackets);
402 const size_type actualNumReceives =
403 Teuchos::as<size_type>(plan.getNumReceives()) +
404 Teuchos::as<size_type>(plan.hasSelfMessage() ? 1 : 0);
406 for (size_type i = 0; i < actualNumReceives; ++i) {
407 rdispls[plan.getProcsFrom()[i]] = importStarts[i];
408 size_t totalPacketsFrom_i = importLengths[i];
411 TEUCHOS_TEST_FOR_EXCEPTION(totalPacketsFrom_i >
size_t(INT_MAX),
413 "Tpetra::Distributor::doPostsAllToAll: "
414 "Recv count for receive "
415 << i <<
" (" << totalPacketsFrom_i
417 "to be represented as int.");
418 recvcounts[plan.getProcsFrom()[i]] =
static_cast<int>(totalPacketsFrom_i);
421 Teuchos::RCP<const Teuchos::MpiComm<int>> mpiComm =
422 Teuchos::rcp_dynamic_cast<
const Teuchos::MpiComm<int>>(comm);
423 Teuchos::RCP<const Teuchos::OpaqueWrapper<MPI_Comm>> rawComm =
424 mpiComm->getRawMpiComm();
425 using T =
typename ExpView::non_const_value_type;
426 MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits<T>::getType(T());
428 #if defined(HAVE_TPETRACORE_MPI_ADVANCE)
429 if (Details::DISTRIBUTOR_MPIADVANCE_ALLTOALL == plan.getSendType()) {
430 MPIX_Comm* mpixComm = *plan.getMPIXComm();
431 TEUCHOS_TEST_FOR_EXCEPTION(!mpixComm, std::runtime_error,
432 "MPIX_Comm is null in doPostsAllToAll \""
433 << __FILE__ <<
":" << __LINE__);
435 const int err = MPIX_Alltoallv(
436 exports.data(), sendcounts.data(), sdispls.data(), rawType,
437 imports.data(), recvcounts.data(), rdispls.data(), rawType, mpixComm);
439 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
440 "MPIX_Alltoallv failed with error \""
441 << Teuchos::mpiErrorCodeToString(err)
446 #endif // HAVE_TPETRACORE_MPI_ADVANCE
448 const int err = MPI_Alltoallv(
449 exports.data(), sendcounts.data(), sdispls.data(), rawType,
450 imports.data(), recvcounts.data(), rdispls.data(), rawType, (*rawComm)());
452 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
453 "MPI_Alltoallv failed with error \""
454 << Teuchos::mpiErrorCodeToString(err)
458 #if defined(HAVE_TPETRACORE_MPI_ADVANCE)
459 template <
class ExpView,
class ImpView>
460 void DistributorActor::doPostsNbrAllToAllVImpl(
const DistributorPlan& plan,
461 const ExpView& exports,
462 const SubViewLimits& exportSubViewLimits,
463 const ImpView& imports,
464 const SubViewLimits& importSubViewLimits) {
465 TEUCHOS_TEST_FOR_EXCEPTION(
466 !plan.getIndicesTo().is_null(), std::runtime_error,
467 "Send Type=\"Alltoall\" only works for fast-path communication.");
469 const int myRank = plan.getComm()->getRank();
470 MPIX_Comm* mpixComm = *plan.getMPIXComm();
471 using size_type = Teuchos::Array<size_t>::size_type;
473 const size_t numSends = plan.getNumSends() + plan.hasSelfMessage();
474 const size_t numRecvs = plan.getNumReceives() + plan.hasSelfMessage();
475 std::vector<int> sendcounts(numSends, 0);
476 std::vector<int> sdispls(numSends, 0);
477 std::vector<int> recvcounts(numRecvs, 0);
478 std::vector<int> rdispls(numRecvs, 0);
480 auto& [importStarts, importLengths] = importSubViewLimits;
481 auto& [exportStarts, exportLengths] = exportSubViewLimits;
483 for (
size_t pp = 0; pp < numSends; ++pp) {
484 sdispls[pp] = exportStarts[pp];
485 size_t numPackets = exportLengths[pp];
487 TEUCHOS_TEST_FOR_EXCEPTION(numPackets >
size_t(INT_MAX), std::logic_error,
488 "Tpetra::Distributor::doPostsNbrAllToAllV: "
489 "Send count for send "
490 << pp <<
" (" << numPackets
492 "to be represented as int.");
493 sendcounts[pp] =
static_cast<int>(numPackets);
496 for (size_type i = 0; i < numRecvs; ++i) {
497 rdispls[i] = importStarts[i];
498 size_t totalPacketsFrom_i = importLengths[i];
501 TEUCHOS_TEST_FOR_EXCEPTION(totalPacketsFrom_i >
size_t(INT_MAX),
503 "Tpetra::Distributor::doPostsNbrAllToAllV: "
504 "Recv count for receive "
505 << i <<
" (" << totalPacketsFrom_i
507 "to be represented as int.");
508 recvcounts[i] =
static_cast<int>(totalPacketsFrom_i);
511 using T =
typename ExpView::non_const_value_type;
512 MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits<T>::getType(T());
516 MPIX_Info_init(&xinfo);
517 MPIX_Topo_init(numRecvs, plan.getProcsFrom().data(), recvcounts.data(),
518 numSends, plan.getProcsTo().data(), sendcounts.data(), xinfo, &xtopo);
519 const int err = MPIX_Neighbor_alltoallv_topo(
520 exports.data(), sendcounts.data(), sdispls.data(), rawType,
521 imports.data(), recvcounts.data(), rdispls.data(), rawType, xtopo, mpixComm);
522 MPIX_Topo_free(&xtopo);
523 MPIX_Info_free(&xinfo);
525 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
526 "MPIX_Neighbor_alltoallv failed with error \""
527 << Teuchos::mpiErrorCodeToString(err)
530 #endif // HAVE_TPETRACORE_MPI_ADVANCE
531 #endif // HAVE_TPETRA_MPI
533 template <
class ImpView>
534 void DistributorActor::doPostRecvs(
const DistributorPlan& plan,
536 const ImpView& imports) {
537 auto importSubViewLimits = plan.getImportViewLimits(numPackets);
538 doPostRecvsImpl(plan, imports, importSubViewLimits);
541 template <
class ImpView>
542 void DistributorActor::doPostRecvs(
const DistributorPlan& plan,
543 const ImpView& imports,
544 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID) {
545 auto importSubViewLimits = plan.getImportViewLimits(numImportPacketsPerLID);
546 doPostRecvsImpl(plan, imports, importSubViewLimits);
549 template <
class ImpView>
550 void DistributorActor::doPostRecvsImpl(
const DistributorPlan& plan,
551 const ImpView& imports,
552 const SubViewLimits& importSubViewLimits) {
553 static_assert(isKokkosView<ImpView>,
554 "Data arrays for DistributorActor::doPostRecvs must be Kokkos::Views");
555 using Kokkos::Compat::subview_offset;
556 using Teuchos::Array;
558 using Teuchos::ireceive;
559 using size_type = Array<size_t>::size_type;
560 using imports_view_type = ImpView;
566 auto comm = plan.getComm();
568 auto non_const_comm = Teuchos::rcp_const_cast<Teuchos::Comm<int>>(comm);
569 mpiTag_ = non_const_comm->incrementTag();
572 #ifdef KOKKOS_ENABLE_CUDA
573 static_assert(!std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
574 "Please do not use Tpetra::Distributor with UVM "
575 "allocations. See GitHub issue #1088.");
576 #endif // KOKKOS_ENABLE_CUDA
578 #ifdef KOKKOS_ENABLE_SYCL
579 static_assert(!std::is_same<typename ImpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value,
580 "Please do not use Tpetra::Distributor with SharedUSM "
581 "allocations. See GitHub issue #1088 (corresponding to CUDA).");
582 #endif // KOKKOS_ENABLE_SYCL
584 #if defined(HAVE_TPETRA_MPI)
590 if ((sendType == Details::DISTRIBUTOR_ALLTOALL) || (sendType == Details::DISTRIBUTOR_IALLTOFEWV)
591 #ifdef HAVE_TPETRACORE_MPI_ADVANCE
592 || (sendType == Details::DISTRIBUTOR_MPIADVANCE_ALLTOALL) || (sendType == Details::DISTRIBUTOR_MPIADVANCE_NBRALLTOALLV)
597 #endif // HAVE_TPETRA_MPI
599 ProfilingRegion pr(
"Tpetra::Distributor::doPostRecvs");
601 const int myProcID = plan.getComm()->getRank();
603 auto& [importStarts, importLengths] = importSubViewLimits;
618 const size_type actualNumReceives = as<size_type>(plan.getNumReceives()) +
619 as<size_type>(plan.hasSelfMessage() ? 1 : 0);
621 #ifdef HAVE_TPETRA_DEBUG
622 size_t totalNumImportPackets = 0;
623 for (
size_t i = 0; i < Teuchos::as<size_t>(actualNumReceives); ++i) {
624 totalNumImportPackets += importLengths[i];
626 TEUCHOS_TEST_FOR_EXCEPTION(
627 imports.extent(0) < totalNumImportPackets, std::runtime_error,
628 "Tpetra::Distributor::doPostRecvs: The 'imports' array must have "
629 "enough entries to hold the expected number of import packets. "
630 "imports.extent(0) = "
631 << imports.extent(0) <<
" < "
632 "totalNumImportPackets = "
633 << totalNumImportPackets <<
".");
634 TEUCHOS_TEST_FOR_EXCEPTION(!requestsRecv_.empty(), std::logic_error,
635 "Tpetra::Distributor::"
636 "doPostRecvs: Process "
637 << myProcID <<
": requestsRecv_.size () = "
638 << requestsRecv_.size() <<
" != 0.");
639 #endif // HAVE_TPETRA_DEBUG
641 requestsRecv_.resize(0);
649 ProfilingRegion prr(
"Tpetra::Distributor::doPostRecvs MPI_Irecv");
651 for (size_type i = 0; i < actualNumReceives; ++i) {
652 size_t totalPacketsFrom_i = importLengths[Teuchos::as<size_t>(i)];
653 TEUCHOS_TEST_FOR_EXCEPTION(totalPacketsFrom_i >
size_t(INT_MAX),
655 "Tpetra::Distributor::doPostRecvs: "
656 "Recv count for receive "
657 << i <<
" (" << totalPacketsFrom_i <<
") is too large "
658 "to be represented as int.");
659 if (plan.getProcsFrom()[i] != myProcID && totalPacketsFrom_i) {
668 imports_view_type recvBuf =
669 subview_offset(imports, importStarts[i], totalPacketsFrom_i);
670 requestsRecv_.push_back(ireceive<int>(recvBuf, plan.getProcsFrom()[i],
671 mpiTag_, *plan.getComm()));
677 template <
class ExpView,
class ImpView>
678 void DistributorActor::doPostSends(
const DistributorPlan& plan,
679 const ExpView& exports,
681 const ImpView& imports) {
682 auto exportSubViewLimits = plan.getExportViewLimits(numPackets);
683 auto importSubViewLimits = plan.getImportViewLimits(numPackets);
684 doPostSendsImpl(plan, exports, exportSubViewLimits, imports, importSubViewLimits);
687 template <
class ExpView,
class ImpView>
688 void DistributorActor::doPostSends(
const DistributorPlan& plan,
689 const ExpView& exports,
690 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
691 const ImpView& imports,
692 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID) {
693 auto exportSubViewLimits = plan.getExportViewLimits(numExportPacketsPerLID);
694 auto importSubViewLimits = plan.getImportViewLimits(numImportPacketsPerLID);
695 doPostSendsImpl(plan, exports, exportSubViewLimits, imports, importSubViewLimits);
698 template <
class ExpView,
class ImpView>
699 void DistributorActor::doPostSendsImpl(
const DistributorPlan& plan,
700 const ExpView& exports,
701 const SubViewLimits& exportSubViewLimits,
702 const ImpView& imports,
703 const SubViewLimits& importSubViewLimits) {
704 static_assert(areKokkosViews<ExpView, ImpView>,
705 "Data arrays for DistributorActor::doPostSends must be Kokkos::Views");
706 using Kokkos::Compat::deep_copy_offset;
707 using Kokkos::Compat::subview_offset;
708 using Teuchos::Array;
710 using Teuchos::isend;
712 using size_type = Array<size_t>::size_type;
713 using exports_view_type = ExpView;
715 #ifdef KOKKOS_ENABLE_CUDA
716 static_assert(!std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
717 !std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
718 "Please do not use Tpetra::Distributor with UVM allocations. "
719 "See Trilinos GitHub issue #1088.");
720 #endif // KOKKOS_ENABLE_CUDA
722 #ifdef KOKKOS_ENABLE_SYCL
723 static_assert(!std::is_same<typename ExpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value &&
724 !std::is_same<typename ImpView::memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value,
725 "Please do not use Tpetra::Distributor with SharedUSM allocations. "
726 "See Trilinos GitHub issue #1088 (corresponding to CUDA).");
727 #endif // KOKKOS_ENABLE_SYCL
729 ProfilingRegion ps(
"Tpetra::Distributor::doPostSends");
731 const int myRank = plan.getComm()->getRank();
736 auto& [exportStarts, exportLengths] = exportSubViewLimits;
737 auto& [importStarts, importLengths] = importSubViewLimits;
739 #if defined(HAVE_TPETRA_MPI)
743 if (sendType == Details::DISTRIBUTOR_ALLTOALL) {
744 doPostsAllToAllImpl(plan, exports, exportSubViewLimits, imports, importSubViewLimits);
746 }
else if (sendType == Details::DISTRIBUTOR_IALLTOFEWV) {
747 doPostsIalltofewvImpl(plan, exports, exportSubViewLimits, imports, importSubViewLimits);
750 #ifdef HAVE_TPETRACORE_MPI_ADVANCE
751 else if (sendType == Details::DISTRIBUTOR_MPIADVANCE_ALLTOALL) {
752 doPostsAllToAllImpl(plan, exports, exportSubViewLimits, imports, importSubViewLimits);
754 }
else if (sendType == Details::DISTRIBUTOR_MPIADVANCE_NBRALLTOALLV) {
755 doPostsNbrAllToAllVImpl(plan, exports, exportSubViewLimits, imports, importSubViewLimits);
758 #endif // defined(HAVE_TPETRACORE_MPI_ADVANCE)
760 #else // HAVE_TPETRA_MPI
761 if (plan.hasSelfMessage()) {
769 size_t selfReceiveOffset = 0;
770 deep_copy_offset(imports, exports, selfReceiveOffset,
776 #endif // HAVE_TPETRA_MPI
778 size_t selfReceiveOffset = 0;
780 #ifdef HAVE_TPETRA_DEBUG
781 TEUCHOS_TEST_FOR_EXCEPTION(requestsSend_.size() != 0,
783 "Tpetra::Distributor::doPostSends: Process "
784 << myRank <<
": requestsSend_.size() = " << requestsSend_.size() <<
" != 0.");
785 #endif // HAVE_TPETRA_DEBUG
800 const size_type actualNumReceives = as<size_type>(plan.getNumReceives()) +
801 as<size_type>(plan.hasSelfMessage() ? 1 : 0);
802 requestsSend_.resize(0);
805 for (size_type i = 0; i < actualNumReceives; ++i) {
806 if (plan.getProcsFrom()[i] == myRank) {
807 selfReceiveOffset = importStarts[i];
812 ProfilingRegion pss(
"Tpetra::Distributor::doPostSends sends");
819 size_t numBlocks = plan.getNumSends() + plan.hasSelfMessage();
820 size_t procIndex = 0;
821 while ((procIndex < numBlocks) && (plan.getProcsTo()[procIndex] < myRank)) {
824 if (procIndex == numBlocks) {
829 size_t selfIndex = 0;
831 if (plan.getIndicesTo().is_null()) {
832 const char isend_region[] =
"Tpetra::Distributor::doPostSends MPI_Isend FAST";
833 const char send_region[] =
"Tpetra::Distributor::doPostSends MPI_Send FAST";
834 ProfilingRegion pssf((sendType == Details::DISTRIBUTOR_ISEND) ? isend_region : send_region);
838 for (
size_t i = 0; i < numBlocks; ++i) {
839 size_t p = i + procIndex;
840 if (p > (numBlocks - 1)) {
844 if (plan.getProcsTo()[p] != myRank) {
845 if (exportLengths[p] == 0) {
850 exports_view_type tmpSend = subview_offset(exports, exportStarts[p], exportLengths[p]);
852 if (sendType == Details::DISTRIBUTOR_ISEND) {
856 exports_view_type tmpSendBuf =
857 subview_offset(exports, exportStarts[p], exportLengths[p]);
858 requestsSend_.push_back(isend<int>(tmpSendBuf, plan.getProcsTo()[p],
859 mpiTag_, *plan.getComm()));
862 as<int>(tmpSend.size()),
863 plan.getProcsTo()[p], mpiTag_, *plan.getComm());
870 if (plan.hasSelfMessage()) {
878 deep_copy_offset(imports, exports, selfReceiveOffset,
879 exportStarts[selfNum], exportLengths[selfNum]);
883 ProfilingRegion psss(
"Tpetra::Distributor::doPostSends: MPI_Send SLOW");
885 using Packet =
typename ExpView::non_const_value_type;
886 using Layout =
typename ExpView::array_layout;
887 using Device =
typename ExpView::device_type;
888 using Mem =
typename ExpView::memory_traits;
898 #ifdef HAVE_TPETRA_DEBUG
899 if (sendType != Details::DISTRIBUTOR_SEND) {
900 if (plan.getComm()->getRank() == 0)
901 std::cout <<
"The requested Tpetra send type "
903 <<
" requires Distributor data to be ordered by"
904 <<
" the receiving processor rank. Since these"
905 <<
" data are not ordered, Tpetra will use Send"
906 <<
" instead." << std::endl;
910 size_t maxSendLength = 0;
911 for (
size_t i = 0; i < numBlocks; ++i) {
912 size_t p = i + procIndex;
913 if (p > (numBlocks - 1)) {
917 size_t sendArrayOffset = 0;
918 size_t j = plan.getStartsTo()[p];
919 for (
size_t k = 0; k < plan.getLengthsTo()[p]; ++k, ++j) {
920 sendArrayOffset += exportLengths[j];
922 maxSendLength = std::max(maxSendLength, sendArrayOffset);
924 Kokkos::View<Packet*, Layout, Device, Mem> sendArray(
"sendArray", maxSendLength);
926 for (
size_t i = 0; i < numBlocks; ++i) {
927 size_t p = i + procIndex;
928 if (p > (numBlocks - 1)) {
932 if (plan.getProcsTo()[p] != myRank) {
933 size_t sendArrayOffset = 0;
934 size_t j = plan.getStartsTo()[p];
935 for (
size_t k = 0; k < plan.getLengthsTo()[p]; ++k, ++j) {
936 packOffset(sendArray, exports, sendArrayOffset, exportStarts[j], exportLengths[j]);
937 sendArrayOffset += exportLengths[j];
939 typename ExpView::execution_space().fence();
942 subview_offset(sendArray,
size_t(0), sendArrayOffset);
945 as<int>(tmpSend.size()),
946 plan.getProcsTo()[p], mpiTag_, *plan.getComm());
949 selfIndex = plan.getStartsTo()[p];
953 if (plan.hasSelfMessage()) {
954 for (
size_t k = 0; k < plan.getLengthsTo()[selfNum]; ++k) {
955 packOffset(imports, exports, selfReceiveOffset, exportStarts[selfIndex], exportLengths[selfIndex]);
956 selfReceiveOffset += exportLengths[selfIndex];
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex<float> and Kokkos::complex...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
std::string DistributorSendTypeEnumToString(EDistributorSendType sendType)
Convert an EDistributorSendType enum value to a string.
Stand-alone utility functions and macros.
EDistributorSendType
The type of MPI send that Distributor should use.