Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 // clang-format off
11 #ifndef TPETRA_DISTOBJECT_DEF_HPP
12 #define TPETRA_DISTOBJECT_DEF_HPP
13 
21 
22 #include "Tpetra_Distributor.hpp"
25 #include "Tpetra_Details_checkGlobalError.hpp"
27 #include "Tpetra_Util.hpp" // Details::createPrefix
28 #include "Teuchos_CommHelpers.hpp"
29 #include "Teuchos_TypeNameTraits.hpp"
30 #include <typeinfo>
31 #include <memory>
32 #include <sstream>
33 
34 namespace Tpetra {
35 
36  namespace { // (anonymous)
37  template<class DeviceType, class IndexType = size_t>
38  struct SumFunctor {
39  SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
40  viewToSum_ (viewToSum) {}
41  KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
42  lclSum += viewToSum_(i);
43  }
44  Kokkos::View<const size_t*, DeviceType> viewToSum_;
45  };
46 
47  template<class DeviceType, class IndexType = size_t>
48  size_t
49  countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
50  {
51  using Kokkos::parallel_reduce;
52  typedef DeviceType DT;
53  typedef typename DT::execution_space DES;
54  typedef Kokkos::RangePolicy<DES, IndexType> range_type;
55 
56  const IndexType numOut = numImportPacketsPerLID.extent (0);
57  size_t totalImportPackets = 0;
58  parallel_reduce ("Count import packets",
59  range_type (0, numOut),
60  SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
61  totalImportPackets);
62  return totalImportPackets;
63  }
64  } // namespace (anonymous)
65 
66 
67  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
69  DistObject (const Teuchos::RCP<const map_type>& map) :
70  map_ (map)
71  {
72 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
73  using Teuchos::RCP;
74  using Teuchos::Time;
75  using Teuchos::TimeMonitor;
76 
77  RCP<Time> doXferTimer =
78  TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
79  if (doXferTimer.is_null ()) {
80  doXferTimer =
81  TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
82  }
83  doXferTimer_ = doXferTimer;
84 
85  RCP<Time> copyAndPermuteTimer =
86  TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
87  if (copyAndPermuteTimer.is_null ()) {
88  copyAndPermuteTimer =
89  TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
90  }
91  copyAndPermuteTimer_ = copyAndPermuteTimer;
92 
93  RCP<Time> packAndPrepareTimer =
94  TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
95  if (packAndPrepareTimer.is_null ()) {
96  packAndPrepareTimer =
97  TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
98  }
99  packAndPrepareTimer_ = packAndPrepareTimer;
100 
101  RCP<Time> doPostsAndWaitsTimer =
102  TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
103  if (doPostsAndWaitsTimer.is_null ()) {
104  doPostsAndWaitsTimer =
105  TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
106  }
107  doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
108 
109  RCP<Time> unpackAndCombineTimer =
110  TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
111  if (unpackAndCombineTimer.is_null ()) {
112  unpackAndCombineTimer =
113  TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
114  }
115  unpackAndCombineTimer_ = unpackAndCombineTimer;
116 #endif // HAVE_TPETRA_TRANSFER_TIMERS
117  }
118 
119  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
120  std::string
122  description () const
123  {
124  using Teuchos::TypeNameTraits;
125 
126  std::ostringstream os;
127  os << "\"Tpetra::DistObject\": {"
128  << "Packet: " << TypeNameTraits<packet_type>::name ()
129  << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
130  << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
131  << ", Node: " << TypeNameTraits<Node>::name ();
132  if (this->getObjectLabel () != "") {
133  os << "Label: \"" << this->getObjectLabel () << "\"";
134  }
135  os << "}";
136  return os.str ();
137  }
138 
139  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
140  void
142  describe (Teuchos::FancyOStream &out,
143  const Teuchos::EVerbosityLevel verbLevel) const
144  {
145  using Teuchos::rcpFromRef;
146  using Teuchos::TypeNameTraits;
147  using std::endl;
148  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
149  Teuchos::VERB_LOW : verbLevel;
150  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
151  const int myRank = comm.is_null () ? 0 : comm->getRank ();
152  const int numProcs = comm.is_null () ? 1 : comm->getSize ();
153 
154  if (vl != Teuchos::VERB_NONE) {
155  Teuchos::OSTab tab0 (out);
156  if (myRank == 0) {
157  out << "\"Tpetra::DistObject\":" << endl;
158  }
159  Teuchos::OSTab tab1 (out);
160  if (myRank == 0) {
161  out << "Template parameters:" << endl;
162  {
163  Teuchos::OSTab tab2 (out);
164  out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
165  << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
166  << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
167  << "Node: " << TypeNameTraits<node_type>::name () << endl;
168  }
169  if (this->getObjectLabel () != "") {
170  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
171  }
172  } // if myRank == 0
173 
174  // Describe the Map.
175  {
176  if (myRank == 0) {
177  out << "Map:" << endl;
178  }
179  Teuchos::OSTab tab2 (out);
180  map_->describe (out, vl);
181  }
182 
183  // At verbosity > VERB_LOW, each process prints something.
184  if (vl > Teuchos::VERB_LOW) {
185  for (int p = 0; p < numProcs; ++p) {
186  if (myRank == p) {
187  out << "Process " << myRank << ":" << endl;
188  Teuchos::OSTab tab2 (out);
189  out << "Export buffer size (in packets): "
190  << exports_.extent (0)
191  << endl
192  << "Import buffer size (in packets): "
193  << imports_.extent (0)
194  << endl;
195  }
196  if (! comm.is_null ()) {
197  comm->barrier (); // give output time to finish
198  comm->barrier ();
199  comm->barrier ();
200  }
201  } // for each process rank p
202  } // if vl > VERB_LOW
203  } // if vl != VERB_NONE
204  }
205 
206  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
207  void
209  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
210  {
211  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
212  "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
213  }
214 
215  /* These are provided in base DistObject template
216  template<class DistObjectType>
217  void
218  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
219  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
220  typename DistObjectType::global_ordinal_type,
221  typename DistObjectType::node_type> >& newMap)
222  {
223  input->removeEmptyProcessesInPlace (newMap);
224  if (newMap.is_null ()) { // my process is excluded
225  input = Teuchos::null;
226  }
227  }
228 
229  template<class DistObjectType>
230  void
231  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
232  {
233  using Teuchos::RCP;
234  typedef typename DistObjectType::local_ordinal_type LO;
235  typedef typename DistObjectType::global_ordinal_type GO;
236  typedef typename DistObjectType::node_type NT;
237  typedef Map<LO, GO, NT> map_type;
238 
239  RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
240  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
241  }
242  */
243 
244  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
245  void
247  doImport (const SrcDistObject& source,
249  const CombineMode CM,
250  const bool restrictedMode)
251  {
252  using Details::Behavior;
253  using std::endl;
254  const char modeString[] = "doImport (forward mode)";
255 
256  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
257  // output to std::cerr on every MPI process. This is unwise for
258  // runs with large numbers of MPI processes.
259  const bool verbose = Behavior::verbose("DistObject");
260  std::unique_ptr<std::string> prefix;
261  if (verbose) {
262  prefix = this->createPrefix("DistObject", modeString);
263  std::ostringstream os;
264  os << *prefix << "Start" << endl;
265  std::cerr << os.str ();
266  }
267  this->beginImport(source, importer, CM, restrictedMode);
268  this->endImport(source, importer, CM, restrictedMode);
269  if (verbose) {
270  std::ostringstream os;
271  os << *prefix << "Done" << endl;
272  std::cerr << os.str ();
273  }
274  }
275 
276  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
277  void
279  doExport (const SrcDistObject& source,
281  const CombineMode CM,
282  const bool restrictedMode)
283  {
284  using Details::Behavior;
285  using std::endl;
286  const char modeString[] = "doExport (forward mode)";
287 
288  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
289  // output to std::cerr on every MPI process. This is unwise for
290  // runs with large numbers of MPI processes.
291  const bool verbose = Behavior::verbose("DistObject");
292  std::unique_ptr<std::string> prefix;
293  if (verbose) {
294  prefix = this->createPrefix("DistObject", modeString);
295  std::ostringstream os;
296  os << *prefix << "Start" << endl;
297  std::cerr << os.str ();
298  }
299  this->beginExport(source, exporter, CM, restrictedMode);
300  this->endExport(source, exporter, CM, restrictedMode);
301  if (verbose) {
302  std::ostringstream os;
303  os << *prefix << "Done" << endl;
304  std::cerr << os.str ();
305  }
306  }
307 
308  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
309  void
311  doImport (const SrcDistObject& source,
313  const CombineMode CM,
314  const bool restrictedMode)
315  {
316  using Details::Behavior;
317  using std::endl;
318  const char modeString[] = "doImport (reverse mode)";
319 
320  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
321  // output to std::cerr on every MPI process. This is unwise for
322  // runs with large numbers of MPI processes.
323  const bool verbose = Behavior::verbose("DistObject");
324  std::unique_ptr<std::string> prefix;
325  if (verbose) {
326  prefix = this->createPrefix("DistObject", modeString);
327  std::ostringstream os;
328  os << *prefix << "Start" << endl;
329  std::cerr << os.str ();
330  }
331  this->beginImport(source, exporter, CM, restrictedMode);
332  this->endImport(source, exporter, CM, restrictedMode);
333  if (verbose) {
334  std::ostringstream os;
335  os << *prefix << "Done" << endl;
336  std::cerr << os.str ();
337  }
338  }
339 
340  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
341  void
343  doExport (const SrcDistObject& source,
345  const CombineMode CM,
346  const bool restrictedMode)
347  {
348  using Details::Behavior;
349  using std::endl;
350  const char modeString[] = "doExport (reverse mode)";
351 
352  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
353  // output to std::cerr on every MPI process. This is unwise for
354  // runs with large numbers of MPI processes.
355  const bool verbose = Behavior::verbose("DistObject");
356  std::unique_ptr<std::string> prefix;
357  if (verbose) {
358  prefix = this->createPrefix("DistObject", modeString);
359  std::ostringstream os;
360  os << *prefix << "Start" << endl;
361  std::cerr << os.str ();
362  }
363  this->beginExport(source, importer, CM, restrictedMode);
364  this->endExport(source, importer, CM, restrictedMode);
365  if (verbose) {
366  std::ostringstream os;
367  os << *prefix << "Done" << endl;
368  std::cerr << os.str ();
369  }
370  }
371 
372  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
373  void
375  beginImport(const SrcDistObject& source,
377  const CombineMode CM,
378  const bool restrictedMode)
379  {
380  using Details::Behavior;
381  using std::endl;
382  const char modeString[] = "beginImport (forward mode)";
383 
384  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
385  // output to std::cerr on every MPI process. This is unwise for
386  // runs with large numbers of MPI processes.
387  const bool verbose = Behavior::verbose("DistObject");
388  std::unique_ptr<std::string> prefix;
389  if (verbose) {
390  prefix = this->createPrefix("DistObject", modeString);
391  std::ostringstream os;
392  os << *prefix << "Start" << endl;
393  std::cerr << os.str ();
394  }
395  this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
396  if (verbose) {
397  std::ostringstream os;
398  os << *prefix << "Done" << endl;
399  std::cerr << os.str ();
400  }
401  }
402 
403  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
404  void
405  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
406  beginExport(const SrcDistObject& source,
407  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
408  const CombineMode CM,
409  const bool restrictedMode)
410  {
411  using Details::Behavior;
412  using std::endl;
413  const char modeString[] = "beginExport (forward mode)";
414 
415  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
416  // output to std::cerr on every MPI process. This is unwise for
417  // runs with large numbers of MPI processes.
418  const bool verbose = Behavior::verbose("DistObject");
419  std::unique_ptr<std::string> prefix;
420  if (verbose) {
421  prefix = this->createPrefix("DistObject", modeString);
422  std::ostringstream os;
423  os << *prefix << "Start" << endl;
424  std::cerr << os.str ();
425  }
426  this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
427  if (verbose) {
428  std::ostringstream os;
429  os << *prefix << "Done" << endl;
430  std::cerr << os.str ();
431  }
432  }
433 
434  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
435  void
436  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
437  beginImport(const SrcDistObject& source,
438  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
439  const CombineMode CM,
440  const bool restrictedMode)
441  {
442  using Details::Behavior;
443  using std::endl;
444  const char modeString[] = "beginImport (reverse mode)";
445 
446  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
447  // output to std::cerr on every MPI process. This is unwise for
448  // runs with large numbers of MPI processes.
449  const bool verbose = Behavior::verbose("DistObject");
450  std::unique_ptr<std::string> prefix;
451  if (verbose) {
452  prefix = this->createPrefix("DistObject", modeString);
453  std::ostringstream os;
454  os << *prefix << "Start" << endl;
455  std::cerr << os.str ();
456  }
457  this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
458  if (verbose) {
459  std::ostringstream os;
460  os << *prefix << "Done" << endl;
461  std::cerr << os.str ();
462  }
463  }
464 
465  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
466  void
467  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
468  beginExport(const SrcDistObject& source,
469  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
470  const CombineMode CM,
471  const bool restrictedMode)
472  {
473  using Details::Behavior;
474  using std::endl;
475  const char modeString[] = "beginExport (reverse mode)";
476 
477  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
478  // output to std::cerr on every MPI process. This is unwise for
479  // runs with large numbers of MPI processes.
480  const bool verbose = Behavior::verbose("DistObject");
481  std::unique_ptr<std::string> prefix;
482  if (verbose) {
483  prefix = this->createPrefix("DistObject", modeString);
484  std::ostringstream os;
485  os << *prefix << "Start" << endl;
486  std::cerr << os.str ();
487  }
488  this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
489  if (verbose) {
490  std::ostringstream os;
491  os << *prefix << "Done" << endl;
492  std::cerr << os.str ();
493  }
494  }
495 
496  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
497  void
498  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
499  endImport(const SrcDistObject& source,
500  const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
501  const CombineMode CM,
502  const bool restrictedMode)
503  {
504  using Details::Behavior;
505  using std::endl;
506  const char modeString[] = "endImport (forward mode)";
507 
508  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
509  // output to std::cerr on every MPI process. This is unwise for
510  // runs with large numbers of MPI processes.
511  const bool verbose = Behavior::verbose("DistObject");
512  std::unique_ptr<std::string> prefix;
513  if (verbose) {
514  prefix = this->createPrefix("DistObject", modeString);
515  std::ostringstream os;
516  os << *prefix << "Start" << endl;
517  std::cerr << os.str ();
518  }
519  this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
520  if (verbose) {
521  std::ostringstream os;
522  os << *prefix << "Done" << endl;
523  std::cerr << os.str ();
524  }
525  }
526 
527  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
528  void
529  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
530  endExport(const SrcDistObject& source,
531  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
532  const CombineMode CM,
533  const bool restrictedMode)
534  {
535  using Details::Behavior;
536  using std::endl;
537  const char modeString[] = "endExport (forward mode)";
538 
539  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
540  // output to std::cerr on every MPI process. This is unwise for
541  // runs with large numbers of MPI processes.
542  const bool verbose = Behavior::verbose("DistObject");
543  std::unique_ptr<std::string> prefix;
544  if (verbose) {
545  prefix = this->createPrefix("DistObject", modeString);
546  std::ostringstream os;
547  os << *prefix << "Start" << endl;
548  std::cerr << os.str ();
549  }
550  this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
551  if (verbose) {
552  std::ostringstream os;
553  os << *prefix << "Done" << endl;
554  std::cerr << os.str ();
555  }
556  }
557 
558  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
559  void
560  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
561  endImport(const SrcDistObject& source,
562  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
563  const CombineMode CM,
564  const bool restrictedMode)
565  {
566  using Details::Behavior;
567  using std::endl;
568  const char modeString[] = "endImport (reverse mode)";
569 
570  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
571  // output to std::cerr on every MPI process. This is unwise for
572  // runs with large numbers of MPI processes.
573  const bool verbose = Behavior::verbose("DistObject");
574  std::unique_ptr<std::string> prefix;
575  if (verbose) {
576  prefix = this->createPrefix("DistObject", modeString);
577  std::ostringstream os;
578  os << *prefix << "Start" << endl;
579  std::cerr << os.str ();
580  }
581  this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
582  if (verbose) {
583  std::ostringstream os;
584  os << *prefix << "Done" << endl;
585  std::cerr << os.str ();
586  }
587  }
588 
589  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
590  void
591  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
592  endExport(const SrcDistObject& source,
593  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
594  const CombineMode CM,
595  const bool restrictedMode)
596  {
597  using Details::Behavior;
598  using std::endl;
599  const char modeString[] = "endExport (reverse mode)";
600 
601  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
602  // output to std::cerr on every MPI process. This is unwise for
603  // runs with large numbers of MPI processes.
604  const bool verbose = Behavior::verbose("DistObject");
605  std::unique_ptr<std::string> prefix;
606  if (verbose) {
607  prefix = this->createPrefix("DistObject", modeString);
608  std::ostringstream os;
609  os << *prefix << "Start" << endl;
610  std::cerr << os.str ();
611  }
612  this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
613  if (verbose) {
614  std::ostringstream os;
615  os << *prefix << "Done" << endl;
616  std::cerr << os.str ();
617  }
618  }
619 
620  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
621  bool
624  return distributorActor_.isReady();
625  }
626 
627  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
628  bool
630  isDistributed () const {
631  return map_->isDistributed ();
632  }
633 
634  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
635  size_t
638  return 0; // default implementation; subclasses may override
639  }
640 
641  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
642  void
645  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
646  const char modeString[],
647  const ReverseOption revOp,
648  const CombineMode CM,
649  bool restrictedMode)
650  {
651  beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
652  endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
653  }
654 
655  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
656  bool
658  reallocImportsIfNeeded (const size_t newSize,
659  const bool verbose,
660  const std::string* prefix,
661  const bool /*remoteLIDsContiguous*/,
662  const CombineMode /*CM*/)
663  {
664  if (verbose) {
665  std::ostringstream os;
666  os << *prefix << "Realloc (if needed) imports_ from "
667  << imports_.extent (0) << " to " << newSize << std::endl;
668  std::cerr << os.str ();
669  }
671  const bool reallocated =
672  reallocDualViewIfNeeded (this->imports_, newSize, "imports");
673  if (verbose) {
674  std::ostringstream os;
675  os << *prefix << "Finished realloc'ing imports_" << std::endl;
676  std::cerr << os.str ();
677  }
678  return reallocated;
679  }
680 
681  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
682  bool
684  reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
685  const size_t numImportLIDs)
686  {
687  using Details::Behavior;
690  using std::endl;
691  // If an array is already allocated, and if is at least
692  // tooBigFactor times bigger than it needs to be, free it and
693  // reallocate to the size we need, in order to save space.
694  // Otherwise, take subviews to reduce allocation size.
695  constexpr size_t tooBigFactor = 10;
696 
697  const bool verbose = Behavior::verbose("DistObject");
698  std::unique_ptr<std::string> prefix;
699  if (verbose) {
700  prefix = this->createPrefix("DistObject",
701  "reallocArraysForNumPacketsPerLid");
702  std::ostringstream os;
703  os << *prefix
704  << "numExportLIDs: " << numExportLIDs
705  << ", numImportLIDs: " << numImportLIDs
706  << endl;
707  os << *prefix << "DualView status before:" << endl
708  << *prefix
709  << dualViewStatusToString (this->numExportPacketsPerLID_,
710  "numExportPacketsPerLID_")
711  << endl
712  << *prefix
713  << dualViewStatusToString (this->numImportPacketsPerLID_,
714  "numImportPacketsPerLID_")
715  << endl;
716  std::cerr << os.str ();
717  }
718 
719  // Reallocate numExportPacketsPerLID_ if needed.
720  const bool firstReallocated =
721  reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
722  numExportLIDs,
723  "numExportPacketsPerLID",
724  tooBigFactor,
725  true); // need fence before, if realloc'ing
726 
727  // If we reallocated above, then we fenced after that
728  // reallocation. This means that we don't need to fence again,
729  // before the next reallocation.
730  const bool needFenceBeforeNextAlloc = ! firstReallocated;
731  const bool secondReallocated =
732  reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
733  numImportLIDs,
734  "numImportPacketsPerLID",
735  tooBigFactor,
736  needFenceBeforeNextAlloc);
737 
738  if (verbose) {
739  std::ostringstream os;
740  os << *prefix << "DualView status after:" << endl
741  << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
742  "numExportPacketsPerLID_")
743  << endl
744  << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
745  "numImportPacketsPerLID_")
746  << endl;
747  std::cerr << os.str ();
748  }
749 
750  return firstReallocated || secondReallocated;
751  }
752 
753  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
754  void
757  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
758  const char modeString[],
759  const ReverseOption revOp,
760  const CombineMode CM,
761  bool restrictedMode)
762  {
763  using Details::Behavior;
767  using Kokkos::Compat::getArrayView;
768  using Kokkos::Compat::getConstArrayView;
769  using Kokkos::Compat::getKokkosViewDeepCopy;
770  using Kokkos::Compat::create_const_view;
771  using std::endl;
774 
775  const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
776  const char funcNameHost[] = "Tpetra::DistObject::beginTransfer[Host]";
777  const char funcNameDevice[] = "Tpetra::DistObject::beginTransfer[Device]";
778  const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
779 
780  ProfilingRegion region_doTransfer(funcName);
781  const bool verbose = Behavior::verbose("DistObject");
782  std::shared_ptr<std::string> prefix;
783  if (verbose) {
784  std::ostringstream os;
785  prefix = this->createPrefix("DistObject", "doTransfer");
786  os << *prefix << "Source type: " << Teuchos::typeName(src)
787  << ", Target type: " << Teuchos::typeName(*this) << endl;
788  std::cerr << os.str();
789  }
790 
791  // "Restricted Mode" does two things:
792  // 1) Skips copyAndPermute
793  // 2) Allows the "target" Map of the transfer to be a subset of
794  // the Map of *this, in a "locallyFitted" sense.
795  //
796  // This cannot be used if #2 is not true, OR there are permutes.
797  // Source Maps still need to match
798 
799  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
800  // checks. These may communicate more.
801  const bool debug = Behavior::debug("DistObject");
802  if (debug) {
803  if (! restrictedMode && revOp == DoForward) {
804  const bool myMapSameAsTransferTgtMap =
805  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
806  TEUCHOS_TEST_FOR_EXCEPTION
807  (! myMapSameAsTransferTgtMap, std::invalid_argument,
808  "Tpetra::DistObject::" << modeString << ": For forward-mode "
809  "communication, the target DistObject's Map must be the same "
810  "(in the sense of Tpetra::Map::isSameAs) as the input "
811  "Export/Import object's target Map.");
812  }
813  else if (! restrictedMode && revOp == DoReverse) {
814  const bool myMapSameAsTransferSrcMap =
815  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
816  TEUCHOS_TEST_FOR_EXCEPTION
817  (! myMapSameAsTransferSrcMap, std::invalid_argument,
818  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
819  "communication, the target DistObject's Map must be the same "
820  "(in the sense of Tpetra::Map::isSameAs) as the input "
821  "Export/Import object's source Map.");
822  }
823  else if (restrictedMode && revOp == DoForward) {
824  const bool myMapLocallyFittedTransferTgtMap =
825  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
826  TEUCHOS_TEST_FOR_EXCEPTION
827  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
828  "Tpetra::DistObject::" << modeString << ": For forward-mode "
829  "communication using restricted mode, Export/Import object's "
830  "target Map must be locally fitted (in the sense of "
831  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
832  }
833  else { // if (restrictedMode && revOp == DoReverse)
834  const bool myMapLocallyFittedTransferSrcMap =
835  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
836  TEUCHOS_TEST_FOR_EXCEPTION
837  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
838  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
839  "communication using restricted mode, Export/Import object's "
840  "source Map must be locally fitted (in the sense of "
841  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
842  }
843 
844  // SrcDistObject need not even _have_ Maps. However, if the
845  // source object is a DistObject, it has a Map, and we may
846  // compare that Map with the Transfer's Maps.
847  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
848  if (srcDistObj != nullptr) {
849  if (revOp == DoForward) {
850  const bool srcMapSameAsImportSrcMap =
851  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
852  TEUCHOS_TEST_FOR_EXCEPTION
853  (! srcMapSameAsImportSrcMap, std::invalid_argument,
854  "Tpetra::DistObject::" << modeString << ": For forward-mode "
855  "communication, the source DistObject's Map must be the same "
856  "as the input Export/Import object's source Map.");
857  }
858  else { // revOp == DoReverse
859  const bool srcMapSameAsImportTgtMap =
860  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
861  TEUCHOS_TEST_FOR_EXCEPTION
862  (! srcMapSameAsImportTgtMap, std::invalid_argument,
863  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
864  "communication, the source DistObject's Map must be the same "
865  "as the input Export/Import object's target Map.");
866  }
867  }
868  }
869 
870  const size_t numSameIDs = transfer.getNumSameIDs ();
871  Distributor& distor = transfer.getDistributor ();
872  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
873 
874  TEUCHOS_TEST_FOR_EXCEPTION
875  (debug && restrictedMode &&
876  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
877  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
878  std::invalid_argument,
879  "Tpetra::DistObject::" << modeString << ": Transfer object "
880  "cannot have permutes in restricted mode.");
881 
882  // Do we need all communication buffers to live on host?
883  if (verbose) {
884  std::ostringstream os;
885  os << *prefix << "doTransfer: Use new interface; "
886  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
887  std::cerr << os.str ();
888  }
889 
890  using const_lo_dv_type =
891  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
892  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
893  transfer.getPermuteToLIDs_dv () :
894  transfer.getPermuteFromLIDs_dv ();
895  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
896  transfer.getPermuteFromLIDs_dv () :
897  transfer.getPermuteToLIDs_dv ();
898  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
899  transfer.getRemoteLIDs_dv () :
900  transfer.getExportLIDs_dv ();
901  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
902  transfer.getExportLIDs_dv () :
903  transfer.getRemoteLIDs_dv ();
904  const bool canTryAliasing = (revOp == DoForward) ?
905  transfer.areRemoteLIDsContiguous() :
906  transfer.areExportLIDsContiguous();
907  // const bool canTryAliasing = false;
908 
909  ProfilingRegion region_dTN(funcName);
910 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
911  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
912  // of Kokkos profiling.
913  Teuchos::TimeMonitor doXferMon (*doXferTimer_);
914 #endif // HAVE_TPETRA_TRANSFER_TIMERS
915 
916  if (verbose) {
917  std::ostringstream os;
918  os << *prefix << "Input arguments:" << endl
919  << *prefix << " combineMode: " << combineModeToString (CM) << endl
920  << *prefix << " numSameIDs: " << numSameIDs << endl
921  << *prefix << " "
922  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
923  << *prefix << " "
924  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
925  << *prefix << " "
926  << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
927  << *prefix << " "
928  << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
929  << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
930  << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
931  std::cerr << os.str ();
932  }
933 
934  {
935  ProfilingRegion region_cs ("Tpetra::DistObject::doTransferNew::checkSizes");
936  if (verbose) {
937  std::ostringstream os;
938  os << *prefix << "1. checkSizes" << endl;
939  std::cerr << os.str ();
940  }
941  const bool checkSizesResult = this->checkSizes (src);
942  TEUCHOS_TEST_FOR_EXCEPTION
943  (! checkSizesResult, std::invalid_argument,
944  "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
945  "destination object is not a legal target for redistribution from the "
946  "source object. This probably means that they do not have the same "
947  "dimensions. For example, MultiVectors must have the same number of "
948  "rows and columns.");
949  }
950 
951  // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
952  // that if CM == INSERT || CM == REPLACE, the target object could
953  // be write only. We don't optimize for that here.
954 
955  if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
956  // There is at least one GID to copy or permute.
957  if (verbose) {
958  std::ostringstream os;
959  os << *prefix << "2. copyAndPermute" << endl;
960  std::cerr << os.str ();
961  }
962  ProfilingRegion region_cp
963  ("Tpetra::DistObject::doTransferNew::copyAndPermute");
964 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
965  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
966  // of Kokkos profiling.
967  Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
968 #endif // HAVE_TPETRA_TRANSFER_TIMERS
969 
970  if (numSameIDs + permuteToLIDs.extent (0) != 0) {
971  // There is at least one GID to copy or permute.
972  if (verbose) {
973  std::ostringstream os;
974  os << *prefix << "2. copyAndPermute" << endl;
975  std::cerr << os.str ();
976  }
977  this->copyAndPermute (src, numSameIDs, permuteToLIDs,
978  permuteFromLIDs, CM);
979  if (verbose) {
980  std::ostringstream os;
981  os << *prefix << "After copyAndPermute:" << endl
982  << *prefix << " "
983  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
984  << endl
985  << *prefix << " "
986  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
987  << endl;
988  std::cerr << os.str ();
989  }
990  }
991  }
992 
993  // The method may return zero even if the implementation actually
994  // does have a constant number of packets per LID. However, if it
995  // returns nonzero, we may use this information to avoid
996  // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
997  // will set this to its final value.
998  //
999  // We only need this if CM != ZERO, but it has to be lifted out of
1000  // that scope because there are multiple tests for CM != ZERO.
1001  size_t constantNumPackets = this->constantNumberOfPackets ();
1002  if (verbose) {
1003  std::ostringstream os;
1004  os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
1005  std::cerr << os.str ();
1006  }
1007 
1008  // We only need to pack communication buffers if the combine mode
1009  // is not ZERO. A "ZERO combine mode" means that the results are
1010  // the same as if we had received all zeros, and added them to the
1011  // existing values. That means we don't need to communicate.
1012  if (CM != ZERO) {
1013  if (constantNumPackets == 0) {
1014  if (verbose) {
1015  std::ostringstream os;
1016  os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1017  << endl;
1018  std::cerr << os.str ();
1019  }
1020  // This only reallocates if necessary, that is, if the sizes
1021  // don't match.
1022  this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1023  remoteLIDs.extent (0));
1024  }
1025 
1026  if (verbose) {
1027  std::ostringstream os;
1028  os << *prefix << "4. packAndPrepare: before, "
1029  << dualViewStatusToString (this->exports_, "exports_")
1030  << endl;
1031  std::cerr << os.str ();
1032  }
1033 
1034  doPackAndPrepare(src, exportLIDs, constantNumPackets, execution_space());
1035  if (commOnHost) {
1036  this->exports_.sync_host();
1037  }
1038  else {
1039  this->exports_.sync_device();
1040  }
1041 
1042  if (verbose) {
1043  std::ostringstream os;
1044  os << *prefix << "5.1. After packAndPrepare, "
1045  << dualViewStatusToString (this->exports_, "exports_")
1046  << endl;
1047  std::cerr << os.str ();
1048  }
1049  } // if (CM != ZERO)
1050 
1051  // We only need to send data if the combine mode is not ZERO.
1052  if (CM != ZERO) {
1053  if (constantNumPackets != 0) {
1054  // There are a constant number of packets per element. We
1055  // already know (from the number of "remote" (incoming)
1056  // elements) how many incoming elements we expect, so we can
1057  // resize the buffer accordingly.
1058  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1059  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1060  }
1061 
1062  // Do we need to do communication (via doPostsAndWaits)?
1063  bool needCommunication = true;
1064 
1065  // This may be NULL. It will be used below.
1066  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1067 
1068  if (revOp == DoReverse && ! this->isDistributed ()) {
1069  needCommunication = false;
1070  }
1071  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1072  // is distributed requires a cast to DistObject. If it's not a
1073  // DistObject, then I'm not quite sure what to do. Perhaps it
1074  // would be more appropriate for SrcDistObject to have an
1075  // isDistributed() method. For now, I'll just assume that we
1076  // need to do communication unless the cast succeeds and the
1077  // source is not distributed.
1078  else if (revOp == DoForward && srcDistObj != NULL &&
1079  ! srcDistObj->isDistributed ()) {
1080  needCommunication = false;
1081  }
1082 
1083  if (! needCommunication) {
1084  if (verbose) {
1085  std::ostringstream os;
1086  os << *prefix << "Comm not needed; skipping" << endl;
1087  std::cerr << os.str ();
1088  }
1089  }
1090  else {
1091  ProfilingRegion region_dpw
1092  ("Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1093 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1094  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1095  // favor of Kokkos profiling.
1096  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1097 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1098 
1099  if (verbose) {
1100  std::ostringstream os;
1101  os << *prefix << "7.0. "
1102  << (revOp == DoReverse ? "Reverse" : "Forward")
1103  << " mode" << endl;
1104  std::cerr << os.str ();
1105  }
1106 
1107  doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1108  } // if (needCommunication)
1109  } // if (CM != ZERO)
1110  }
1111 
1112  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1113  void
1115  endTransfer(const SrcDistObject& src,
1116  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1117  const char modeString[],
1118  const ReverseOption revOp,
1119  const CombineMode CM,
1120  bool restrictedMode)
1121  {
1122  using Details::Behavior;
1126  using Kokkos::Compat::getArrayView;
1127  using Kokkos::Compat::getConstArrayView;
1128  using Kokkos::Compat::getKokkosViewDeepCopy;
1129  using Kokkos::Compat::create_const_view;
1130  using std::endl;
1133 
1134  const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
1135  const char funcNameHost[] = "Tpetra::DistObject::endTransfer[Host]";
1136  const char funcNameDevice[] = "Tpetra::DistObject::endTransfer[Device]";
1137  const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
1138  ProfilingRegion region_doTransfer(funcName);
1139  const bool verbose = Behavior::verbose("DistObject");
1140  std::shared_ptr<std::string> prefix;
1141  if (verbose) {
1142  std::ostringstream os;
1143  prefix = this->createPrefix("DistObject", "doTransfer");
1144  os << *prefix << "Source type: " << Teuchos::typeName(src)
1145  << ", Target type: " << Teuchos::typeName(*this) << endl;
1146  std::cerr << os.str();
1147  }
1148 
1149  // "Restricted Mode" does two things:
1150  // 1) Skips copyAndPermute
1151  // 2) Allows the "target" Map of the transfer to be a subset of
1152  // the Map of *this, in a "locallyFitted" sense.
1153  //
1154  // This cannot be used if #2 is not true, OR there are permutes.
1155  // Source Maps still need to match
1156 
1157  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1158  // checks. These may communicate more.
1159  const bool debug = Behavior::debug("DistObject");
1160  if (debug) {
1161  if (! restrictedMode && revOp == DoForward) {
1162  const bool myMapSameAsTransferTgtMap =
1163  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1164  TEUCHOS_TEST_FOR_EXCEPTION
1165  (! myMapSameAsTransferTgtMap, std::invalid_argument,
1166  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1167  "communication, the target DistObject's Map must be the same "
1168  "(in the sense of Tpetra::Map::isSameAs) as the input "
1169  "Export/Import object's target Map.");
1170  }
1171  else if (! restrictedMode && revOp == DoReverse) {
1172  const bool myMapSameAsTransferSrcMap =
1173  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1174  TEUCHOS_TEST_FOR_EXCEPTION
1175  (! myMapSameAsTransferSrcMap, std::invalid_argument,
1176  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1177  "communication, the target DistObject's Map must be the same "
1178  "(in the sense of Tpetra::Map::isSameAs) as the input "
1179  "Export/Import object's source Map.");
1180  }
1181  else if (restrictedMode && revOp == DoForward) {
1182  const bool myMapLocallyFittedTransferTgtMap =
1183  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1184  TEUCHOS_TEST_FOR_EXCEPTION
1185  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1186  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1187  "communication using restricted mode, Export/Import object's "
1188  "target Map must be locally fitted (in the sense of "
1189  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1190  }
1191  else { // if (restrictedMode && revOp == DoReverse)
1192  const bool myMapLocallyFittedTransferSrcMap =
1193  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1194  TEUCHOS_TEST_FOR_EXCEPTION
1195  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1196  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1197  "communication using restricted mode, Export/Import object's "
1198  "source Map must be locally fitted (in the sense of "
1199  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1200  }
1201 
1202  // SrcDistObject need not even _have_ Maps. However, if the
1203  // source object is a DistObject, it has a Map, and we may
1204  // compare that Map with the Transfer's Maps.
1205  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1206  if (srcDistObj != nullptr) {
1207  if (revOp == DoForward) {
1208  const bool srcMapSameAsImportSrcMap =
1209  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1210  TEUCHOS_TEST_FOR_EXCEPTION
1211  (! srcMapSameAsImportSrcMap, std::invalid_argument,
1212  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1213  "communication, the source DistObject's Map must be the same "
1214  "as the input Export/Import object's source Map.");
1215  }
1216  else { // revOp == DoReverse
1217  const bool srcMapSameAsImportTgtMap =
1218  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1219  TEUCHOS_TEST_FOR_EXCEPTION
1220  (! srcMapSameAsImportTgtMap, std::invalid_argument,
1221  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1222  "communication, the source DistObject's Map must be the same "
1223  "as the input Export/Import object's target Map.");
1224  }
1225  }
1226  }
1227 
1228  Distributor& distor = transfer.getDistributor ();
1229  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1230 
1231  TEUCHOS_TEST_FOR_EXCEPTION
1232  (debug && restrictedMode &&
1233  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1234  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1235  std::invalid_argument,
1236  "Tpetra::DistObject::" << modeString << ": Transfer object "
1237  "cannot have permutes in restricted mode.");
1238 
1239  // Do we need all communication buffers to live on host?
1240  if (verbose) {
1241  std::ostringstream os;
1242  os << *prefix << "doTransfer: Use new interface; "
1243  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
1244  std::cerr << os.str ();
1245  }
1246 
1247  using const_lo_dv_type =
1248  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1249  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1250  transfer.getPermuteToLIDs_dv () :
1251  transfer.getPermuteFromLIDs_dv ();
1252  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1253  transfer.getPermuteFromLIDs_dv () :
1254  transfer.getPermuteToLIDs_dv ();
1255  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1256  transfer.getRemoteLIDs_dv () :
1257  transfer.getExportLIDs_dv ();
1258  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1259  transfer.getExportLIDs_dv () :
1260  transfer.getRemoteLIDs_dv ();
1261  const bool canTryAliasing = (revOp == DoForward) ?
1262  transfer.areRemoteLIDsContiguous() :
1263  transfer.areExportLIDsContiguous();
1264 
1265  size_t constantNumPackets = this->constantNumberOfPackets ();
1266 
1267  // We only need to send data if the combine mode is not ZERO.
1268  if (CM != ZERO) {
1269  if (constantNumPackets != 0) {
1270  // There are a constant number of packets per element. We
1271  // already know (from the number of "remote" (incoming)
1272  // elements) how many incoming elements we expect, so we can
1273  // resize the buffer accordingly.
1274  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1275  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1276  }
1277 
1278  // Do we need to do communication (via doPostsAndWaits)?
1279  bool needCommunication = true;
1280 
1281  // This may be NULL. It will be used below.
1282  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1283 
1284  if (revOp == DoReverse && ! this->isDistributed ()) {
1285  needCommunication = false;
1286  }
1287  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1288  // is distributed requires a cast to DistObject. If it's not a
1289  // DistObject, then I'm not quite sure what to do. Perhaps it
1290  // would be more appropriate for SrcDistObject to have an
1291  // isDistributed() method. For now, I'll just assume that we
1292  // need to do communication unless the cast succeeds and the
1293  // source is not distributed.
1294  else if (revOp == DoForward && srcDistObj != NULL &&
1295  ! srcDistObj->isDistributed ()) {
1296  needCommunication = false;
1297  }
1298 
1299  if (! needCommunication) {
1300  if (verbose) {
1301  std::ostringstream os;
1302  os << *prefix << "Comm not needed; skipping" << endl;
1303  std::cerr << os.str ();
1304  }
1305  }
1306  else {
1307  distributorActor_.doWaitsRecv(distributorPlan);
1308 
1309  if (verbose) {
1310  std::ostringstream os;
1311  os << *prefix << "8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) << ", constantNumPackets " << constantNumPackets << endl;
1312  std::cerr << os.str ();
1313  }
1314  doUnpackAndCombine(remoteLIDs, constantNumPackets, CM, execution_space());
1315 
1316  distributorActor_.doWaitsSend(distributorPlan);
1317  } // if (needCommunication)
1318  } // if (CM != ZERO)
1319 
1320  if (verbose) {
1321  std::ostringstream os;
1322  os << *prefix << "9. Done!" << endl;
1323  std::cerr << os.str ();
1324  }
1325 
1326  if (verbose) {
1327  std::ostringstream os;
1328  os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1329  std::cerr << os.str ();
1330  }
1331  }
1332 
1333  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1334  void
1335  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1336  doPosts(const Details::DistributorPlan& distributorPlan,
1337  size_t constantNumPackets,
1338  bool commOnHost,
1339  std::shared_ptr<std::string> prefix,
1340  const bool canTryAliasing,
1341  const CombineMode CM)
1342  {
1345  using Kokkos::Compat::create_const_view;
1346  using std::endl;
1347 
1348  const bool verbose = Details::Behavior::verbose("DistObject");
1349 
1350  if (constantNumPackets == 0) { // variable num packets per LID
1351  if (verbose) {
1352  std::ostringstream os;
1353  os << *prefix << "7.1. Variable # packets / LID: first comm "
1354  << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1355  << endl;
1356  std::cerr << os.str ();
1357  }
1358  size_t totalImportPackets = 0;
1359  if (commOnHost) {
1360  if (this->numExportPacketsPerLID_.need_sync_host ()) {
1361  this->numExportPacketsPerLID_.sync_host ();
1362  }
1363  if (this->numImportPacketsPerLID_.need_sync_host ()) {
1364  this->numImportPacketsPerLID_.sync_host ();
1365  }
1366  this->numImportPacketsPerLID_.modify_host (); // out arg
1367  auto numExp_h =
1368  create_const_view (this->numExportPacketsPerLID_.view_host ());
1369  auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1370 
1371  // MPI communication happens here.
1372  if (verbose) {
1373  std::ostringstream os;
1374  os << *prefix << "Call doPostsAndWaits"
1375  << endl;
1376  std::cerr << os.str ();
1377  }
1378  distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1379 
1380  if (verbose) {
1381  std::ostringstream os;
1382  os << *prefix << "Count totalImportPackets" << std::endl;
1383  std::cerr << os.str ();
1384  }
1385  using the_dev_type = typename decltype (numImp_h)::device_type;
1386  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1387  }
1388  else { // ! commOnHost
1389  this->numExportPacketsPerLID_.sync_device ();
1390  this->numImportPacketsPerLID_.sync_device ();
1391  this->numImportPacketsPerLID_.modify_device (); // out arg
1392  auto numExp_d = create_const_view
1393  (this->numExportPacketsPerLID_.view_device ());
1394  auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1395 
1396  // MPI communication happens here.
1397  if (verbose) {
1398  std::ostringstream os;
1399  os << *prefix << "Call doPostsAndWaits"
1400  << endl;
1401  std::cerr << os.str ();
1402  }
1403 
1404  distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1405 
1406  if (verbose) {
1407  std::ostringstream os;
1408  os << *prefix << "Count totalImportPackets" << std::endl;
1409  std::cerr << os.str ();
1410  }
1411  using the_dev_type = typename decltype (numImp_d)::device_type;
1412  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1413  }
1414 
1415  if (verbose) {
1416  std::ostringstream os;
1417  os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1418  std::cerr << os.str ();
1419  }
1420  this->reallocImportsIfNeeded (totalImportPackets, verbose,
1421  prefix.get (), canTryAliasing, CM);
1422  if (verbose) {
1423  std::ostringstream os;
1424  os << *prefix << "7.3. Second comm" << std::endl;
1425  std::cerr << os.str ();
1426  }
1427 
1428  // mfh 04 Feb 2019: Distributor expects the "num packets per
1429  // LID" arrays on host, so that it can issue MPI sends and
1430  // receives correctly.
1431  this->numExportPacketsPerLID_.sync_host ();
1432  this->numImportPacketsPerLID_.sync_host ();
1433 
1434  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1435  // doReversePostsAndWaits currently want
1436  // numExportPacketsPerLID and numImportPacketsPerLID as
1437  // Teuchos::ArrayView, rather than as Kokkos::View.
1438  //
1439  // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1440  // device. The above syncs might.
1441  auto numExportPacketsPerLID_av =
1442  getArrayViewFromDualView (this->numExportPacketsPerLID_);
1443  auto numImportPacketsPerLID_av =
1444  getArrayViewFromDualView (this->numImportPacketsPerLID_);
1445 
1446  // imports_ is for output only, so we don't need to sync it
1447  // before marking it as modified. However, in order to
1448  // prevent spurious debug-mode errors (e.g., "modified on
1449  // both device and host"), we first need to clear its
1450  // "modified" flags.
1451  this->imports_.clear_sync_state ();
1452 
1453  if (verbose) {
1454  std::ostringstream os;
1455  os << *prefix << "Comm on "
1456  << (commOnHost ? "host" : "device")
1457  << "; call doPosts" << endl;
1458  std::cerr << os.str ();
1459  }
1460 
1461  if (commOnHost) {
1462  this->imports_.modify_host ();
1463  distributorActor_.doPosts
1464  (distributorPlan,
1465  create_const_view (this->exports_.view_host ()),
1466  numExportPacketsPerLID_av,
1467  this->imports_.view_host (),
1468  numImportPacketsPerLID_av);
1469  }
1470  else { // pack on device
1471  Kokkos::fence("DistObject::doPosts-1"); // for UVM
1472  this->imports_.modify_device ();
1473  distributorActor_.doPosts
1474  (distributorPlan,
1475  create_const_view (this->exports_.view_device ()),
1476  numExportPacketsPerLID_av,
1477  this->imports_.view_device (),
1478  numImportPacketsPerLID_av);
1479  }
1480  }
1481  else { // constant number of packets per LID
1482  if (verbose) {
1483  std::ostringstream os;
1484  os << *prefix << "7.1. Const # packets per LID: " << endl
1485  << *prefix << " "
1486  << dualViewStatusToString (this->exports_, "exports_")
1487  << endl
1488  << *prefix << " "
1489  << dualViewStatusToString (this->exports_, "imports_")
1490  << endl;
1491  std::cerr << os.str ();
1492  }
1493  // imports_ is for output only, so we don't need to sync it
1494  // before marking it as modified. However, in order to
1495  // prevent spurious debug-mode errors (e.g., "modified on
1496  // both device and host"), we first need to clear its
1497  // "modified" flags.
1498  this->imports_.clear_sync_state ();
1499 
1500  if (verbose) {
1501  std::ostringstream os;
1502  os << *prefix << "7.2. Comm on "
1503  << (commOnHost ? "host" : "device")
1504  << "; call doPosts" << endl;
1505  std::cerr << os.str ();
1506  }
1507  if (commOnHost) {
1508  this->imports_.modify_host ();
1509  distributorActor_.doPosts
1510  (distributorPlan,
1511  create_const_view (this->exports_.view_host ()),
1512  constantNumPackets,
1513  this->imports_.view_host ());
1514  }
1515  else { // pack on device
1516  Kokkos::fence("DistObject::doPosts-2"); // for UVM
1517  this->imports_.modify_device ();
1518  distributorActor_.doPosts
1519  (distributorPlan,
1520  create_const_view (this->exports_.view_device ()),
1521  constantNumPackets,
1522  this->imports_.view_device ());
1523  } // commOnHost
1524  } // constant or variable num packets per LID
1525  }
1526 
1527  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1528  void
1529  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1530  doPackAndPrepare(const SrcDistObject& src,
1531  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1532  size_t& constantNumPackets,
1533  const execution_space &space)
1534  {
1535  using Details::ProfilingRegion;
1536  using std::endl;
1537  const bool debug = Details::Behavior::debug("DistObject");
1538 
1539  ProfilingRegion region_pp
1540  ("Tpetra::DistObject::doPackAndPrepare");
1541 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1542  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1543  // favor of Kokkos profiling.
1544  Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1545 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1546 
1547  // Ask the source to pack data. Also ask it whether there are
1548  // a constant number of packets per element
1549  // (constantNumPackets is an output argument). If there are,
1550  // constantNumPackets will come back nonzero. Otherwise, the
1551  // source will fill the numExportPacketsPerLID_ array.
1552 
1553  // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1554  // Alternately, make packAndPrepare take a "commOnHost"
1555  // argument to tell it where to leave the data?
1556  //
1557  // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1558  // the freedom to pack and unpack either on host or device.
1559  // We should prefer sync'ing only on demand. Thus, we can
1560  // answer the above question: packAndPrepare should not
1561  // take a commOnHost argument, and doTransferNew should sync
1562  // where needed, if needed.
1563  if (debug) {
1564  std::ostringstream lclErrStrm;
1565  bool lclSuccess = false;
1566  try {
1567  this->packAndPrepare (src, exportLIDs, this->exports_,
1568  this->numExportPacketsPerLID_,
1569  constantNumPackets, space);
1570  lclSuccess = true;
1571  }
1572  catch (std::exception& e) {
1573  lclErrStrm << "packAndPrepare threw an exception: "
1574  << endl << e.what();
1575  }
1576  catch (...) {
1577  lclErrStrm << "packAndPrepare threw an exception "
1578  "not a subclass of std::exception.";
1579  }
1580  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1581  "threw an exception in packAndPrepare on "
1582  "one or more processes in the DistObject's communicator.";
1583  auto comm = getMap()->getComm();
1584  Details::checkGlobalError(std::cerr, lclSuccess,
1585  lclErrStrm.str().c_str(),
1586  gblErrMsgHeader, *comm);
1587  }
1588  else {
1589  this->packAndPrepare (src, exportLIDs, this->exports_,
1590  this->numExportPacketsPerLID_,
1591  constantNumPackets, space);
1592  }
1593  }
1594 
1595  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1596  void
1597  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1598  doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1599  size_t constantNumPackets,
1600  CombineMode CM,
1601  const execution_space &space)
1602  {
1603  using Details::ProfilingRegion;
1604  using std::endl;
1605  const bool debug = Details::Behavior::debug("DistObject");
1606 
1607  ProfilingRegion region_uc
1608  ("Tpetra::DistObject::doUnpackAndCombine");
1609 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1610  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1611  // favor of Kokkos profiling.
1612  Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1613 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1614 
1615  if (debug) {
1616  std::ostringstream lclErrStrm;
1617  bool lclSuccess = false;
1618  try {
1619  this->unpackAndCombine (remoteLIDs, this->imports_,
1620  this->numImportPacketsPerLID_,
1621  constantNumPackets, CM, space);
1622  lclSuccess = true;
1623  }
1624  catch (std::exception& e) {
1625  lclErrStrm << "doUnpackAndCombine threw an exception: "
1626  << endl << e.what();
1627  }
1628  catch (...) {
1629  lclErrStrm << "doUnpackAndCombine threw an exception "
1630  "not a subclass of std::exception.";
1631  }
1632  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1633  "threw an exception in unpackAndCombine on "
1634  "one or more processes in the DistObject's communicator.";
1635  auto comm = getMap()->getComm();
1636  Details::checkGlobalError(std::cerr, lclSuccess,
1637  lclErrStrm.str().c_str(),
1638  gblErrMsgHeader, *comm);
1639  }
1640  else {
1641  this->unpackAndCombine (remoteLIDs, this->imports_,
1642  this->numImportPacketsPerLID_,
1643  constantNumPackets, CM, space);
1644  }
1645  }
1646 
1647  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1648  void
1651  (const SrcDistObject&,
1652  const size_t,
1653  const Kokkos::DualView<
1654  const local_ordinal_type*,
1656  const Kokkos::DualView<
1657  const local_ordinal_type*,
1659  const CombineMode CM)
1660  {}
1661 
1662 // clang-format on
1663 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1665  const SrcDistObject &source, const size_t numSameIDs,
1666  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1667  &permuteToLIDs,
1668  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1669  &permuteFromLIDs,
1670  const CombineMode CM, const execution_space &space) {
1671  /*
1672  This is called if the derived class doesn't know how to pack and prepare in
1673  an arbitrary execution space instance, but it was asked to anyway.
1674  Provide a safe illusion by actually doing the work in the default instance,
1675  and syncing the default instance with the provided instance.
1676  The caller expects
1677  1. any work in the provided instance to complete before this.
1678  2. This to complete before any following work in the provided instance.
1679  */
1680 
1681  space.fence(); // // TODO: Tpetra::Details::Spaces::exec_space_wait
1682  copyAndPermute(source, numSameIDs, permuteToLIDs, permuteFromLIDs,
1683  CM); // default instance
1684  execution_space().fence(); // TODO:
1685  // Tpetra::Details::Spaces::exec_space_wait
1686 }
1687 // clang-format off
1688 
1689 
1690  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1691  void
1694  (const SrcDistObject&,
1695  const Kokkos::DualView<
1696  const local_ordinal_type*,
1698  Kokkos::DualView<
1699  packet_type*,
1701  Kokkos::DualView<
1702  size_t*,
1704  size_t&)
1705  {}
1706 
1707 // clang-format on
1708 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1710  const SrcDistObject &source,
1711  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1712  &exportLIDs,
1713  Kokkos::DualView<packet_type *, buffer_device_type> &exports,
1714  Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1715  size_t &constantNumPackets, const execution_space &space) {
1716  /*
1717  This is called if the derived class doesn't know how to pack and prepare in
1718  an arbitrary execution space instance, but it was asked to anyway.
1719  Provide a safe illusion by actually doing the work in the default instance,
1720  and syncing the default instance with the provided instance.
1721 
1722  The caller expects
1723  1. any work in the provided instance to complete before this.
1724  2. This to complete before any following work in the provided instance.
1725  */
1726 
1727  // wait for any work from prior operations in the provided instance to
1728  // complete
1729  space.fence(); // TODO: Details::Spaces::exec_space_wait
1730 
1731  // pack and prepare in the default instance.
1732  packAndPrepare(source, exportLIDs, exports, numPacketsPerLID,
1733  constantNumPackets); // default instance
1734 
1735  // wait for the default instance to complete before returning, so any
1736  // following work inserted into the provided instance will be done after this
1737  execution_space().fence(); // TODO: Details::Spaces::exec_space_wait
1738 }
1739 // clang-format off
1740 
1741  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1742  void
1745  (const Kokkos::DualView<
1746  const local_ordinal_type*,
1747  buffer_device_type>& /* importLIDs */,
1748  Kokkos::DualView<
1749  packet_type*,
1750  buffer_device_type> /* imports */,
1751  Kokkos::DualView<
1752  size_t*,
1753  buffer_device_type> /* numPacketsPerLID */,
1754  const size_t /* constantNumPackets */,
1755  const CombineMode /* combineMode */)
1756  {}
1757 
1758 // clang-format on
1759 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1761  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1762  &importLIDs,
1763  Kokkos::DualView<packet_type *, buffer_device_type> imports,
1764  Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1765  const size_t constantNumPackets, const CombineMode combineMode,
1766  const execution_space &space) {
1767  // Wait for any work in the provided space to complete
1768  space.fence(); // TODO: Details::Spaces::exec_space_wait(execution_space(),
1769  // space);
1770  unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets,
1771  combineMode); // default instance
1772  // wait for unpack to finish in the default instance, since the caller
1773  // may be expecting sequential semantics in the `space` instance
1774  execution_space().fence(); // TODO: Details::Spaces::exec_space_wait(space,
1775  // execution_space());
1776 }
1777 // clang-format off
1778 
1779 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1781  std::ostream &os) const {
1782  using std::endl;
1783  using Teuchos::FancyOStream;
1784  using Teuchos::getFancyOStream;
1785  using Teuchos::RCP;
1786  using Teuchos::rcpFromRef;
1787 
1788  RCP<FancyOStream> out = getFancyOStream(rcpFromRef(os));
1789  this->describe(*out, Teuchos::VERB_DEFAULT);
1790 }
1791 
1792 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1793 std::unique_ptr<std::string>
1795  const char className[], const char methodName[]) const {
1796  auto map = this->getMap();
1797  auto comm = map.is_null() ? Teuchos::null : map->getComm();
1798  return Details::createPrefix(comm.getRawPtr(), className, methodName);
1799 }
1800 
1801 template <class DistObjectType>
1803  Teuchos::RCP<DistObjectType> &input,
1804  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
1805  typename DistObjectType::global_ordinal_type,
1806  typename DistObjectType::node_type>> &newMap) {
1807  input->removeEmptyProcessesInPlace(newMap);
1808  if (newMap.is_null()) { // my process is excluded
1809  input = Teuchos::null;
1810  }
1811 }
1812 
1813 template <class DistObjectType>
1814 void removeEmptyProcessesInPlace(Teuchos::RCP<DistObjectType> &input) {
1815  auto newMap = input->getMap()->removeEmptyProcesses();
1816  removeEmptyProcessesInPlace<DistObjectType>(input, newMap);
1817 }
1818 
1819 // Explicit instantiation macro for general DistObject.
1820 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1821  template class DistObject<SCALAR, LO, GO, NODE>;
1822 
1823 // Explicit instantiation macro for DistObject<char, ...>.
1824 // The "SLGN" stuff above doesn't work for Packet=char.
1825 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1826  template class DistObject<char, LO, GO, NODE>;
1827 
1828 } // namespace Tpetra
1829 
1830 #endif // TPETRA_DISTOBJECT_DEF_HPP
1831 // clang-format on
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
const Details::DistributorPlan & getPlan() const
Get this Distributor&#39;s DistributorPlan.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object (&quot;forward mode&quot;).
typename::Kokkos::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object&#39;s Map.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode)
Perform any unpacking and combining after communication.
static bool debug()
Whether Tpetra is in debug mode.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
void beginTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Implementation detail of doTransfer.
typename device_type::execution_space execution_space
The Kokkos execution space.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets)
Pack data and metadata for communication (sends).
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
LocalOrdinal local_ordinal_type
The type of local indices.
Replace old values with zero.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual std::string description() const
One-line descriptiion of this object.
bool transferArrived() const
Whether the data from an import/export operation has arrived, and is ready for the unpack and combine...
virtual size_t constantNumberOfPackets() const
Whether the implementation&#39;s instance promises always to have a constant number of packets per LID (l...
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Base class for distributed Tpetra objects that support data redistribution.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Definition: Tpetra_Util.cpp:71
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object&#39;s Map.
Description of Tpetra&#39;s behavior.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.