Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 // clang-format off
11 #ifndef TPETRA_DISTOBJECT_DEF_HPP
12 #define TPETRA_DISTOBJECT_DEF_HPP
13 
21 
22 #include "Tpetra_Distributor.hpp"
25 #include "Tpetra_Details_checkGlobalError.hpp"
27 #include "Tpetra_Util.hpp" // Details::createPrefix
28 #include "Teuchos_CommHelpers.hpp"
29 #include "Teuchos_TypeNameTraits.hpp"
30 #include <typeinfo>
31 #include <memory>
32 #include <sstream>
33 
34 namespace Tpetra {
35 
36  namespace { // (anonymous)
37  template<class DeviceType, class IndexType = size_t>
38  struct SumFunctor {
39  SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
40  viewToSum_ (viewToSum) {}
41  KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
42  lclSum += viewToSum_(i);
43  }
44  Kokkos::View<const size_t*, DeviceType> viewToSum_;
45  };
46 
47  template<class DeviceType, class IndexType = size_t>
48  size_t
49  countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
50  {
51  using Kokkos::parallel_reduce;
52  typedef DeviceType DT;
53  typedef typename DT::execution_space DES;
54  typedef Kokkos::RangePolicy<DES, IndexType> range_type;
55 
56  const IndexType numOut = numImportPacketsPerLID.extent (0);
57  size_t totalImportPackets = 0;
58  parallel_reduce ("Count import packets",
59  range_type (0, numOut),
60  SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
61  totalImportPackets);
62  return totalImportPackets;
63  }
64  } // namespace (anonymous)
65 
66 
67  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
69  DistObject (const Teuchos::RCP<const map_type>& map) :
70  map_ (map)
71  {
72 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
73  using Teuchos::RCP;
74  using Teuchos::Time;
75  using Teuchos::TimeMonitor;
76 
77  RCP<Time> doXferTimer =
78  TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
79  if (doXferTimer.is_null ()) {
80  doXferTimer =
81  TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
82  }
83  doXferTimer_ = doXferTimer;
84 
85  RCP<Time> copyAndPermuteTimer =
86  TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
87  if (copyAndPermuteTimer.is_null ()) {
88  copyAndPermuteTimer =
89  TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
90  }
91  copyAndPermuteTimer_ = copyAndPermuteTimer;
92 
93  RCP<Time> packAndPrepareTimer =
94  TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
95  if (packAndPrepareTimer.is_null ()) {
96  packAndPrepareTimer =
97  TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
98  }
99  packAndPrepareTimer_ = packAndPrepareTimer;
100 
101  RCP<Time> doPostsAndWaitsTimer =
102  TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
103  if (doPostsAndWaitsTimer.is_null ()) {
104  doPostsAndWaitsTimer =
105  TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
106  }
107  doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
108 
109  RCP<Time> unpackAndCombineTimer =
110  TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
111  if (unpackAndCombineTimer.is_null ()) {
112  unpackAndCombineTimer =
113  TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
114  }
115  unpackAndCombineTimer_ = unpackAndCombineTimer;
116 #endif // HAVE_TPETRA_TRANSFER_TIMERS
117  }
118 
119  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
120  std::string
122  description () const
123  {
124  using Teuchos::TypeNameTraits;
125 
126  std::ostringstream os;
127  os << "\"Tpetra::DistObject\": {"
128  << "Packet: " << TypeNameTraits<packet_type>::name ()
129  << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
130  << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
131  << ", Node: " << TypeNameTraits<Node>::name ();
132  if (this->getObjectLabel () != "") {
133  os << "Label: \"" << this->getObjectLabel () << "\"";
134  }
135  os << "}";
136  return os.str ();
137  }
138 
139  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
140  void
142  describe (Teuchos::FancyOStream &out,
143  const Teuchos::EVerbosityLevel verbLevel) const
144  {
145  using Teuchos::rcpFromRef;
146  using Teuchos::TypeNameTraits;
147  using std::endl;
148  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
149  Teuchos::VERB_LOW : verbLevel;
150  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
151  const int myRank = comm.is_null () ? 0 : comm->getRank ();
152  const int numProcs = comm.is_null () ? 1 : comm->getSize ();
153 
154  if (vl != Teuchos::VERB_NONE) {
155  Teuchos::OSTab tab0 (out);
156  if (myRank == 0) {
157  out << "\"Tpetra::DistObject\":" << endl;
158  }
159  Teuchos::OSTab tab1 (out);
160  if (myRank == 0) {
161  out << "Template parameters:" << endl;
162  {
163  Teuchos::OSTab tab2 (out);
164  out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
165  << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
166  << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
167  << "Node: " << TypeNameTraits<node_type>::name () << endl;
168  }
169  if (this->getObjectLabel () != "") {
170  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
171  }
172  } // if myRank == 0
173 
174  // Describe the Map.
175  {
176  if (myRank == 0) {
177  out << "Map:" << endl;
178  }
179  Teuchos::OSTab tab2 (out);
180  map_->describe (out, vl);
181  }
182 
183  // At verbosity > VERB_LOW, each process prints something.
184  if (vl > Teuchos::VERB_LOW) {
185  for (int p = 0; p < numProcs; ++p) {
186  if (myRank == p) {
187  out << "Process " << myRank << ":" << endl;
188  Teuchos::OSTab tab2 (out);
189  out << "Export buffer size (in packets): "
190  << exports_.extent (0)
191  << endl
192  << "Import buffer size (in packets): "
193  << imports_.extent (0)
194  << endl;
195  }
196  if (! comm.is_null ()) {
197  comm->barrier (); // give output time to finish
198  comm->barrier ();
199  comm->barrier ();
200  }
201  } // for each process rank p
202  } // if vl > VERB_LOW
203  } // if vl != VERB_NONE
204  }
205 
206  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
207  void
209  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
210  {
211  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
212  "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
213  }
214 
215  /* These are provided in base DistObject template
216  template<class DistObjectType>
217  void
218  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
219  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
220  typename DistObjectType::global_ordinal_type,
221  typename DistObjectType::node_type> >& newMap)
222  {
223  input->removeEmptyProcessesInPlace (newMap);
224  if (newMap.is_null ()) { // my process is excluded
225  input = Teuchos::null;
226  }
227  }
228 
229  template<class DistObjectType>
230  void
231  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
232  {
233  using Teuchos::RCP;
234  typedef typename DistObjectType::local_ordinal_type LO;
235  typedef typename DistObjectType::global_ordinal_type GO;
236  typedef typename DistObjectType::node_type NT;
237  typedef Map<LO, GO, NT> map_type;
238 
239  RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
240  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
241  }
242  */
243 
244  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
245  void
247  doImport (const SrcDistObject& source,
249  const CombineMode CM,
250  const bool restrictedMode)
251  {
252  using Details::Behavior;
253  using std::endl;
254  const char modeString[] = "doImport (forward mode)";
255 
256  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
257  // output to std::cerr on every MPI process. This is unwise for
258  // runs with large numbers of MPI processes.
259  const bool verbose = Behavior::verbose("DistObject");
260  std::unique_ptr<std::string> prefix;
261  if (verbose) {
262  prefix = this->createPrefix("DistObject", modeString);
263  std::ostringstream os;
264  os << *prefix << "Start" << endl;
265  std::cerr << os.str ();
266  }
267  this->beginImport(source, importer, CM, restrictedMode);
268  this->endImport(source, importer, CM, restrictedMode);
269  if (verbose) {
270  std::ostringstream os;
271  os << *prefix << "Done" << endl;
272  std::cerr << os.str ();
273  }
274  }
275 
276  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
277  void
279  doExport (const SrcDistObject& source,
281  const CombineMode CM,
282  const bool restrictedMode)
283  {
284  using Details::Behavior;
285  using std::endl;
286  const char modeString[] = "doExport (forward mode)";
287 
288  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
289  // output to std::cerr on every MPI process. This is unwise for
290  // runs with large numbers of MPI processes.
291  const bool verbose = Behavior::verbose("DistObject");
292  std::unique_ptr<std::string> prefix;
293  if (verbose) {
294  prefix = this->createPrefix("DistObject", modeString);
295  std::ostringstream os;
296  os << *prefix << "Start" << endl;
297  std::cerr << os.str ();
298  }
299  this->beginExport(source, exporter, CM, restrictedMode);
300  this->endExport(source, exporter, CM, restrictedMode);
301  if (verbose) {
302  std::ostringstream os;
303  os << *prefix << "Done" << endl;
304  std::cerr << os.str ();
305  }
306  }
307 
308  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
309  void
311  doImport (const SrcDistObject& source,
313  const CombineMode CM,
314  const bool restrictedMode)
315  {
316  using Details::Behavior;
317  using std::endl;
318  const char modeString[] = "doImport (reverse mode)";
319 
320  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
321  // output to std::cerr on every MPI process. This is unwise for
322  // runs with large numbers of MPI processes.
323  const bool verbose = Behavior::verbose("DistObject");
324  std::unique_ptr<std::string> prefix;
325  if (verbose) {
326  prefix = this->createPrefix("DistObject", modeString);
327  std::ostringstream os;
328  os << *prefix << "Start" << endl;
329  std::cerr << os.str ();
330  }
331  this->beginImport(source, exporter, CM, restrictedMode);
332  this->endImport(source, exporter, CM, restrictedMode);
333  if (verbose) {
334  std::ostringstream os;
335  os << *prefix << "Done" << endl;
336  std::cerr << os.str ();
337  }
338  }
339 
340  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
341  void
343  doExport (const SrcDistObject& source,
345  const CombineMode CM,
346  const bool restrictedMode)
347  {
348  using Details::Behavior;
349  using std::endl;
350  const char modeString[] = "doExport (reverse mode)";
351 
352  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
353  // output to std::cerr on every MPI process. This is unwise for
354  // runs with large numbers of MPI processes.
355  const bool verbose = Behavior::verbose("DistObject");
356  std::unique_ptr<std::string> prefix;
357  if (verbose) {
358  prefix = this->createPrefix("DistObject", modeString);
359  std::ostringstream os;
360  os << *prefix << "Start" << endl;
361  std::cerr << os.str ();
362  }
363  this->beginExport(source, importer, CM, restrictedMode);
364  this->endExport(source, importer, CM, restrictedMode);
365  if (verbose) {
366  std::ostringstream os;
367  os << *prefix << "Done" << endl;
368  std::cerr << os.str ();
369  }
370  }
371 
372  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
373  void
375  beginImport(const SrcDistObject& source,
377  const CombineMode CM,
378  const bool restrictedMode)
379  {
380  using Details::Behavior;
381  using std::endl;
382  const char modeString[] = "beginImport (forward mode)";
383 
384  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
385  // output to std::cerr on every MPI process. This is unwise for
386  // runs with large numbers of MPI processes.
387  const bool verbose = Behavior::verbose("DistObject");
388  std::unique_ptr<std::string> prefix;
389  if (verbose) {
390  prefix = this->createPrefix("DistObject", modeString);
391  std::ostringstream os;
392  os << *prefix << "Start" << endl;
393  std::cerr << os.str ();
394  }
395  this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
396  if (verbose) {
397  std::ostringstream os;
398  os << *prefix << "Done" << endl;
399  std::cerr << os.str ();
400  }
401  }
402 
403  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
404  void
405  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
406  beginExport(const SrcDistObject& source,
407  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
408  const CombineMode CM,
409  const bool restrictedMode)
410  {
411  using Details::Behavior;
412  using std::endl;
413  const char modeString[] = "beginExport (forward mode)";
414 
415  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
416  // output to std::cerr on every MPI process. This is unwise for
417  // runs with large numbers of MPI processes.
418  const bool verbose = Behavior::verbose("DistObject");
419  std::unique_ptr<std::string> prefix;
420  if (verbose) {
421  prefix = this->createPrefix("DistObject", modeString);
422  std::ostringstream os;
423  os << *prefix << "Start" << endl;
424  std::cerr << os.str ();
425  }
426  this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
427  if (verbose) {
428  std::ostringstream os;
429  os << *prefix << "Done" << endl;
430  std::cerr << os.str ();
431  }
432  }
433 
434  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
435  void
436  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
437  beginImport(const SrcDistObject& source,
438  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
439  const CombineMode CM,
440  const bool restrictedMode)
441  {
442  using Details::Behavior;
443  using std::endl;
444  const char modeString[] = "beginImport (reverse mode)";
445 
446  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
447  // output to std::cerr on every MPI process. This is unwise for
448  // runs with large numbers of MPI processes.
449  const bool verbose = Behavior::verbose("DistObject");
450  std::unique_ptr<std::string> prefix;
451  if (verbose) {
452  prefix = this->createPrefix("DistObject", modeString);
453  std::ostringstream os;
454  os << *prefix << "Start" << endl;
455  std::cerr << os.str ();
456  }
457  this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
458  if (verbose) {
459  std::ostringstream os;
460  os << *prefix << "Done" << endl;
461  std::cerr << os.str ();
462  }
463  }
464 
465  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
466  void
467  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
468  beginExport(const SrcDistObject& source,
469  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
470  const CombineMode CM,
471  const bool restrictedMode)
472  {
473  using Details::Behavior;
474  using std::endl;
475  const char modeString[] = "beginExport (reverse mode)";
476 
477  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
478  // output to std::cerr on every MPI process. This is unwise for
479  // runs with large numbers of MPI processes.
480  const bool verbose = Behavior::verbose("DistObject");
481  std::unique_ptr<std::string> prefix;
482  if (verbose) {
483  prefix = this->createPrefix("DistObject", modeString);
484  std::ostringstream os;
485  os << *prefix << "Start" << endl;
486  std::cerr << os.str ();
487  }
488  this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
489  if (verbose) {
490  std::ostringstream os;
491  os << *prefix << "Done" << endl;
492  std::cerr << os.str ();
493  }
494  }
495 
496  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
497  void
498  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
499  endImport(const SrcDistObject& source,
500  const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
501  const CombineMode CM,
502  const bool restrictedMode)
503  {
504  using Details::Behavior;
505  using std::endl;
506  const char modeString[] = "endImport (forward mode)";
507 
508  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
509  // output to std::cerr on every MPI process. This is unwise for
510  // runs with large numbers of MPI processes.
511  const bool verbose = Behavior::verbose("DistObject");
512  std::unique_ptr<std::string> prefix;
513  if (verbose) {
514  prefix = this->createPrefix("DistObject", modeString);
515  std::ostringstream os;
516  os << *prefix << "Start" << endl;
517  std::cerr << os.str ();
518  }
519  this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
520  if (verbose) {
521  std::ostringstream os;
522  os << *prefix << "Done" << endl;
523  std::cerr << os.str ();
524  }
525  }
526 
527  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
528  void
529  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
530  endExport(const SrcDistObject& source,
531  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
532  const CombineMode CM,
533  const bool restrictedMode)
534  {
535  using Details::Behavior;
536  using std::endl;
537  const char modeString[] = "endExport (forward mode)";
538 
539  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
540  // output to std::cerr on every MPI process. This is unwise for
541  // runs with large numbers of MPI processes.
542  const bool verbose = Behavior::verbose("DistObject");
543  std::unique_ptr<std::string> prefix;
544  if (verbose) {
545  prefix = this->createPrefix("DistObject", modeString);
546  std::ostringstream os;
547  os << *prefix << "Start" << endl;
548  std::cerr << os.str ();
549  }
550  this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
551  if (verbose) {
552  std::ostringstream os;
553  os << *prefix << "Done" << endl;
554  std::cerr << os.str ();
555  }
556  }
557 
558  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
559  void
560  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
561  endImport(const SrcDistObject& source,
562  const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
563  const CombineMode CM,
564  const bool restrictedMode)
565  {
566  using Details::Behavior;
567  using std::endl;
568  const char modeString[] = "endImport (reverse mode)";
569 
570  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
571  // output to std::cerr on every MPI process. This is unwise for
572  // runs with large numbers of MPI processes.
573  const bool verbose = Behavior::verbose("DistObject");
574  std::unique_ptr<std::string> prefix;
575  if (verbose) {
576  prefix = this->createPrefix("DistObject", modeString);
577  std::ostringstream os;
578  os << *prefix << "Start" << endl;
579  std::cerr << os.str ();
580  }
581  this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
582  if (verbose) {
583  std::ostringstream os;
584  os << *prefix << "Done" << endl;
585  std::cerr << os.str ();
586  }
587  }
588 
589  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
590  void
591  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
592  endExport(const SrcDistObject& source,
593  const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
594  const CombineMode CM,
595  const bool restrictedMode)
596  {
597  using Details::Behavior;
598  using std::endl;
599  const char modeString[] = "endExport (reverse mode)";
600 
601  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
602  // output to std::cerr on every MPI process. This is unwise for
603  // runs with large numbers of MPI processes.
604  const bool verbose = Behavior::verbose("DistObject");
605  std::unique_ptr<std::string> prefix;
606  if (verbose) {
607  prefix = this->createPrefix("DistObject", modeString);
608  std::ostringstream os;
609  os << *prefix << "Start" << endl;
610  std::cerr << os.str ();
611  }
612  this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
613  if (verbose) {
614  std::ostringstream os;
615  os << *prefix << "Done" << endl;
616  std::cerr << os.str ();
617  }
618  }
619 
620  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
621  bool
624  return distributorActor_.isReady();
625  }
626 
627  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
628  bool
630  isDistributed () const {
631  return map_->isDistributed ();
632  }
633 
634  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
635  size_t
638  return 0; // default implementation; subclasses may override
639  }
640 
641  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
642  void
645  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
646  const char modeString[],
647  const ReverseOption revOp,
648  const CombineMode CM,
649  bool restrictedMode)
650  {
651  beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
652  endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
653  }
654 
655  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
656  bool
658  reallocImportsIfNeeded (const size_t newSize,
659  const bool verbose,
660  const std::string* prefix,
661  const bool /*remoteLIDsContiguous*/,
662  const CombineMode /*CM*/)
663  {
664  if (verbose) {
665  std::ostringstream os;
666  os << *prefix << "Realloc (if needed) imports_ from "
667  << imports_.extent (0) << " to " << newSize << std::endl;
668  std::cerr << os.str ();
669  }
671  const bool reallocated =
672  reallocDualViewIfNeeded (this->imports_, newSize, "imports");
673  if (verbose) {
674  std::ostringstream os;
675  os << *prefix << "Finished realloc'ing imports_" << std::endl;
676  std::cerr << os.str ();
677  }
678  return reallocated;
679  }
680 
681  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
682  bool
684  reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
685  const size_t numImportLIDs)
686  {
687  using Details::Behavior;
690  using std::endl;
691  // If an array is already allocated, and if is at least
692  // tooBigFactor times bigger than it needs to be, free it and
693  // reallocate to the size we need, in order to save space.
694  // Otherwise, take subviews to reduce allocation size.
695  constexpr size_t tooBigFactor = 10;
696 
697  const bool verbose = Behavior::verbose("DistObject");
698  std::unique_ptr<std::string> prefix;
699  if (verbose) {
700  prefix = this->createPrefix("DistObject",
701  "reallocArraysForNumPacketsPerLid");
702  std::ostringstream os;
703  os << *prefix
704  << "numExportLIDs: " << numExportLIDs
705  << ", numImportLIDs: " << numImportLIDs
706  << endl;
707  os << *prefix << "DualView status before:" << endl
708  << *prefix
709  << dualViewStatusToString (this->numExportPacketsPerLID_,
710  "numExportPacketsPerLID_")
711  << endl
712  << *prefix
713  << dualViewStatusToString (this->numImportPacketsPerLID_,
714  "numImportPacketsPerLID_")
715  << endl;
716  std::cerr << os.str ();
717  }
718 
719  // Reallocate numExportPacketsPerLID_ if needed.
720  const bool firstReallocated =
721  reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
722  numExportLIDs,
723  "numExportPacketsPerLID",
724  tooBigFactor,
725  true); // need fence before, if realloc'ing
726 
727  // If we reallocated above, then we fenced after that
728  // reallocation. This means that we don't need to fence again,
729  // before the next reallocation.
730  const bool needFenceBeforeNextAlloc = ! firstReallocated;
731  const bool secondReallocated =
732  reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
733  numImportLIDs,
734  "numImportPacketsPerLID",
735  tooBigFactor,
736  needFenceBeforeNextAlloc);
737 
738  if (verbose) {
739  std::ostringstream os;
740  os << *prefix << "DualView status after:" << endl
741  << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
742  "numExportPacketsPerLID_")
743  << endl
744  << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
745  "numImportPacketsPerLID_")
746  << endl;
747  std::cerr << os.str ();
748  }
749 
750  return firstReallocated || secondReallocated;
751  }
752 
753  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
754  void
757  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
758  const char modeString[],
759  const ReverseOption revOp,
760  const CombineMode CM,
761  bool restrictedMode)
762  {
763  using Details::Behavior;
767  using Kokkos::Compat::getArrayView;
768  using Kokkos::Compat::getConstArrayView;
769  using Kokkos::Compat::getKokkosViewDeepCopy;
770  using Kokkos::Compat::create_const_view;
771  using std::endl;
774 
775  const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
776  const char funcNameHost[] = "Tpetra::DistObject::beginTransfer[Host]";
777  const char funcNameDevice[] = "Tpetra::DistObject::beginTransfer[Device]";
778  const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
779 
780  ProfilingRegion region_doTransfer(funcName);
781  const bool verbose = Behavior::verbose("DistObject");
782  std::shared_ptr<std::string> prefix;
783  if (verbose) {
784  std::ostringstream os;
785  prefix = this->createPrefix("DistObject", "doTransfer");
786  os << *prefix << "Source type: " << Teuchos::typeName(src)
787  << ", Target type: " << Teuchos::typeName(*this) << endl;
788  std::cerr << os.str();
789  }
790 
791  // "Restricted Mode" does two things:
792  // 1) Skips copyAndPermute
793  // 2) Allows the "target" Map of the transfer to be a subset of
794  // the Map of *this, in a "locallyFitted" sense.
795  //
796  // This cannot be used if #2 is not true, OR there are permutes.
797  // Source Maps still need to match
798 
799  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
800  // checks. These may communicate more.
801  const bool debug = Behavior::debug("DistObject");
802  if (debug) {
803  if (! restrictedMode && revOp == DoForward) {
804  const bool myMapSameAsTransferTgtMap =
805  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
806  TEUCHOS_TEST_FOR_EXCEPTION
807  (! myMapSameAsTransferTgtMap, std::invalid_argument,
808  "Tpetra::DistObject::" << modeString << ": For forward-mode "
809  "communication, the target DistObject's Map must be the same "
810  "(in the sense of Tpetra::Map::isSameAs) as the input "
811  "Export/Import object's target Map.");
812  }
813  else if (! restrictedMode && revOp == DoReverse) {
814  const bool myMapSameAsTransferSrcMap =
815  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
816  TEUCHOS_TEST_FOR_EXCEPTION
817  (! myMapSameAsTransferSrcMap, std::invalid_argument,
818  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
819  "communication, the target DistObject's Map must be the same "
820  "(in the sense of Tpetra::Map::isSameAs) as the input "
821  "Export/Import object's source Map.");
822  }
823  else if (restrictedMode && revOp == DoForward) {
824  const bool myMapLocallyFittedTransferTgtMap =
825  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
826  TEUCHOS_TEST_FOR_EXCEPTION
827  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
828  "Tpetra::DistObject::" << modeString << ": For forward-mode "
829  "communication using restricted mode, Export/Import object's "
830  "target Map must be locally fitted (in the sense of "
831  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
832  }
833  else { // if (restrictedMode && revOp == DoReverse)
834  const bool myMapLocallyFittedTransferSrcMap =
835  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
836  TEUCHOS_TEST_FOR_EXCEPTION
837  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
838  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
839  "communication using restricted mode, Export/Import object's "
840  "source Map must be locally fitted (in the sense of "
841  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
842  }
843 
844  // SrcDistObject need not even _have_ Maps. However, if the
845  // source object is a DistObject, it has a Map, and we may
846  // compare that Map with the Transfer's Maps.
847  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
848  if (srcDistObj != nullptr) {
849  if (revOp == DoForward) {
850  const bool srcMapSameAsImportSrcMap =
851  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
852  TEUCHOS_TEST_FOR_EXCEPTION
853  (! srcMapSameAsImportSrcMap, std::invalid_argument,
854  "Tpetra::DistObject::" << modeString << ": For forward-mode "
855  "communication, the source DistObject's Map must be the same "
856  "as the input Export/Import object's source Map.");
857  }
858  else { // revOp == DoReverse
859  const bool srcMapSameAsImportTgtMap =
860  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
861  TEUCHOS_TEST_FOR_EXCEPTION
862  (! srcMapSameAsImportTgtMap, std::invalid_argument,
863  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
864  "communication, the source DistObject's Map must be the same "
865  "as the input Export/Import object's target Map.");
866  }
867  }
868  }
869 
870  const size_t numSameIDs = transfer.getNumSameIDs ();
871  Distributor& distor = transfer.getDistributor ();
872  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
873 
874  TEUCHOS_TEST_FOR_EXCEPTION
875  (debug && restrictedMode &&
876  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
877  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
878  std::invalid_argument,
879  "Tpetra::DistObject::" << modeString << ": Transfer object "
880  "cannot have permutes in restricted mode.");
881 
882  // Do we need all communication buffers to live on host?
883  if (verbose) {
884  std::ostringstream os;
885  os << *prefix << "doTransfer: Use new interface; "
886  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
887  std::cerr << os.str ();
888  }
889 
890  using const_lo_dv_type =
891  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
892  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
893  transfer.getPermuteToLIDs_dv () :
894  transfer.getPermuteFromLIDs_dv ();
895  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
896  transfer.getPermuteFromLIDs_dv () :
897  transfer.getPermuteToLIDs_dv ();
898  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
899  transfer.getRemoteLIDs_dv () :
900  transfer.getExportLIDs_dv ();
901  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
902  transfer.getExportLIDs_dv () :
903  transfer.getRemoteLIDs_dv ();
904  const bool canTryAliasing = (revOp == DoForward) ?
905  transfer.areRemoteLIDsContiguous() :
906  transfer.areExportLIDsContiguous();
907  // const bool canTryAliasing = false;
908 
909  ProfilingRegion region_dTN(funcName);
910 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
911  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
912  // of Kokkos profiling.
913  Teuchos::TimeMonitor doXferMon (*doXferTimer_);
914 #endif // HAVE_TPETRA_TRANSFER_TIMERS
915 
916  if (verbose) {
917  std::ostringstream os;
918  os << *prefix << "Input arguments:" << endl
919  << *prefix << " combineMode: " << combineModeToString (CM) << endl
920  << *prefix << " numSameIDs: " << numSameIDs << endl
921  << *prefix << " "
922  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
923  << *prefix << " "
924  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
925  << *prefix << " "
926  << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
927  << *prefix << " "
928  << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
929  << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
930  << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
931  std::cerr << os.str ();
932  }
933 
934  {
935  ProfilingRegion region_cs ("Tpetra::DistObject::doTransferNew::checkSizes");
936  if (verbose) {
937  std::ostringstream os;
938  os << *prefix << "1. checkSizes" << endl;
939  std::cerr << os.str ();
940  }
941  const bool checkSizesResult = this->checkSizes (src);
942  TEUCHOS_TEST_FOR_EXCEPTION
943  (! checkSizesResult, std::invalid_argument,
944  "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
945  "destination object is not a legal target for redistribution from the "
946  "source object. This probably means that they do not have the same "
947  "dimensions. For example, MultiVectors must have the same number of "
948  "rows and columns.");
949  }
950 
951  // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
952  // that if CM == INSERT || CM == REPLACE, the target object could
953  // be write only. We don't optimize for that here.
954 
955  if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
956  // There is at least one GID to copy or permute.
957  if (verbose) {
958  std::ostringstream os;
959  os << *prefix << "2. copyAndPermute" << endl;
960  std::cerr << os.str ();
961  }
962  ProfilingRegion region_cp
963  ("Tpetra::DistObject::doTransferNew::copyAndPermute");
964 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
965  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
966  // of Kokkos profiling.
967  Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
968 #endif // HAVE_TPETRA_TRANSFER_TIMERS
969 
970  if (numSameIDs + permuteToLIDs.extent (0) != 0) {
971  // There is at least one GID to copy or permute.
972  if (verbose) {
973  std::ostringstream os;
974  os << *prefix << "2. copyAndPermute" << endl;
975  std::cerr << os.str ();
976  }
977  this->copyAndPermute (src, numSameIDs, permuteToLIDs,
978  permuteFromLIDs, CM);
979  if (verbose) {
980  std::ostringstream os;
981  os << *prefix << "After copyAndPermute:" << endl
982  << *prefix << " "
983  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
984  << endl
985  << *prefix << " "
986  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
987  << endl;
988  std::cerr << os.str ();
989  }
990  }
991  }
992 
993  // The method may return zero even if the implementation actually
994  // does have a constant number of packets per LID. However, if it
995  // returns nonzero, we may use this information to avoid
996  // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
997  // will set this to its final value.
998  //
999  // We only need this if CM != ZERO, but it has to be lifted out of
1000  // that scope because there are multiple tests for CM != ZERO.
1001  size_t constantNumPackets = this->constantNumberOfPackets ();
1002  if (verbose) {
1003  std::ostringstream os;
1004  os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
1005  std::cerr << os.str ();
1006  }
1007 
1008  // We only need to pack communication buffers if the combine mode
1009  // is not ZERO. A "ZERO combine mode" means that the results are
1010  // the same as if we had received all zeros, and added them to the
1011  // existing values. That means we don't need to communicate.
1012  if (CM != ZERO) {
1013  if (constantNumPackets == 0) {
1014  if (verbose) {
1015  std::ostringstream os;
1016  os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1017  << endl;
1018  std::cerr << os.str ();
1019  }
1020  // This only reallocates if necessary, that is, if the sizes
1021  // don't match.
1022  this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1023  remoteLIDs.extent (0));
1024  }
1025 
1026  if (verbose) {
1027  std::ostringstream os;
1028  os << *prefix << "4. packAndPrepare: before, "
1029  << dualViewStatusToString (this->exports_, "exports_")
1030  << endl;
1031  std::cerr << os.str ();
1032  }
1033 
1034  doPackAndPrepare(src, exportLIDs, constantNumPackets, execution_space());
1035  if (commOnHost) {
1036  this->exports_.sync_host();
1037  }
1038  else {
1039  this->exports_.sync_device();
1040  }
1041 
1042  if (verbose) {
1043  std::ostringstream os;
1044  os << *prefix << "5.1. After packAndPrepare, "
1045  << dualViewStatusToString (this->exports_, "exports_")
1046  << endl;
1047  std::cerr << os.str ();
1048  }
1049  } // if (CM != ZERO)
1050 
1051  // We only need to send data if the combine mode is not ZERO.
1052  if (CM != ZERO) {
1053  if (constantNumPackets != 0) {
1054  // There are a constant number of packets per element. We
1055  // already know (from the number of "remote" (incoming)
1056  // elements) how many incoming elements we expect, so we can
1057  // resize the buffer accordingly.
1058  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1059  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1060  }
1061 
1062  // Do we need to do communication (via doPostsAndWaits)?
1063  bool needCommunication = true;
1064 
1065  // This may be NULL. It will be used below.
1066  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1067 
1068  if (revOp == DoReverse && ! this->isDistributed ()) {
1069  needCommunication = false;
1070  }
1071  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1072  // is distributed requires a cast to DistObject. If it's not a
1073  // DistObject, then I'm not quite sure what to do. Perhaps it
1074  // would be more appropriate for SrcDistObject to have an
1075  // isDistributed() method. For now, I'll just assume that we
1076  // need to do communication unless the cast succeeds and the
1077  // source is not distributed.
1078  else if (revOp == DoForward && srcDistObj != NULL &&
1079  ! srcDistObj->isDistributed ()) {
1080  needCommunication = false;
1081  }
1082 
1083  if (! needCommunication) {
1084  if (verbose) {
1085  std::ostringstream os;
1086  os << *prefix << "Comm not needed; skipping" << endl;
1087  std::cerr << os.str ();
1088  }
1089  }
1090  else {
1091  ProfilingRegion region_dpw
1092  ("Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1093 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1094  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1095  // favor of Kokkos profiling.
1096  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1097 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1098 
1099  if (verbose) {
1100  std::ostringstream os;
1101  os << *prefix << "7.0. "
1102  << (revOp == DoReverse ? "Reverse" : "Forward")
1103  << " mode" << endl;
1104  std::cerr << os.str ();
1105  }
1106 
1107  doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1108  } // if (needCommunication)
1109  } // if (CM != ZERO)
1110  }
1111 
1112  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1113  void
1115  endTransfer(const SrcDistObject& src,
1116  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1117  const char modeString[],
1118  const ReverseOption revOp,
1119  const CombineMode CM,
1120  bool restrictedMode)
1121  {
1122  using Details::Behavior;
1126  using Kokkos::Compat::getArrayView;
1127  using Kokkos::Compat::getConstArrayView;
1128  using Kokkos::Compat::getKokkosViewDeepCopy;
1129  using Kokkos::Compat::create_const_view;
1130  using std::endl;
1133 
1134  const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
1135  const char funcNameHost[] = "Tpetra::DistObject::endTransfer[Host]";
1136  const char funcNameDevice[] = "Tpetra::DistObject::endTransfer[Device]";
1137  const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
1138  ProfilingRegion region_doTransfer(funcName);
1139  const bool verbose = Behavior::verbose("DistObject");
1140  std::shared_ptr<std::string> prefix;
1141  if (verbose) {
1142  std::ostringstream os;
1143  prefix = this->createPrefix("DistObject", "doTransfer");
1144  os << *prefix << "Source type: " << Teuchos::typeName(src)
1145  << ", Target type: " << Teuchos::typeName(*this) << endl;
1146  std::cerr << os.str();
1147  }
1148 
1149  // "Restricted Mode" does two things:
1150  // 1) Skips copyAndPermute
1151  // 2) Allows the "target" Map of the transfer to be a subset of
1152  // the Map of *this, in a "locallyFitted" sense.
1153  //
1154  // This cannot be used if #2 is not true, OR there are permutes.
1155  // Source Maps still need to match
1156 
1157  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1158  // checks. These may communicate more.
1159  const bool debug = Behavior::debug("DistObject");
1160  if (debug) {
1161  if (! restrictedMode && revOp == DoForward) {
1162  const bool myMapSameAsTransferTgtMap =
1163  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1164  TEUCHOS_TEST_FOR_EXCEPTION
1165  (! myMapSameAsTransferTgtMap, std::invalid_argument,
1166  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1167  "communication, the target DistObject's Map must be the same "
1168  "(in the sense of Tpetra::Map::isSameAs) as the input "
1169  "Export/Import object's target Map.");
1170  }
1171  else if (! restrictedMode && revOp == DoReverse) {
1172  const bool myMapSameAsTransferSrcMap =
1173  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1174  TEUCHOS_TEST_FOR_EXCEPTION
1175  (! myMapSameAsTransferSrcMap, std::invalid_argument,
1176  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1177  "communication, the target DistObject's Map must be the same "
1178  "(in the sense of Tpetra::Map::isSameAs) as the input "
1179  "Export/Import object's source Map.");
1180  }
1181  else if (restrictedMode && revOp == DoForward) {
1182  const bool myMapLocallyFittedTransferTgtMap =
1183  this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1184  TEUCHOS_TEST_FOR_EXCEPTION
1185  (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1186  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1187  "communication using restricted mode, Export/Import object's "
1188  "target Map must be locally fitted (in the sense of "
1189  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1190  }
1191  else { // if (restrictedMode && revOp == DoReverse)
1192  const bool myMapLocallyFittedTransferSrcMap =
1193  this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1194  TEUCHOS_TEST_FOR_EXCEPTION
1195  (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1196  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1197  "communication using restricted mode, Export/Import object's "
1198  "source Map must be locally fitted (in the sense of "
1199  "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1200  }
1201 
1202  // SrcDistObject need not even _have_ Maps. However, if the
1203  // source object is a DistObject, it has a Map, and we may
1204  // compare that Map with the Transfer's Maps.
1205  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1206  if (srcDistObj != nullptr) {
1207  if (revOp == DoForward) {
1208  const bool srcMapSameAsImportSrcMap =
1209  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1210  TEUCHOS_TEST_FOR_EXCEPTION
1211  (! srcMapSameAsImportSrcMap, std::invalid_argument,
1212  "Tpetra::DistObject::" << modeString << ": For forward-mode "
1213  "communication, the source DistObject's Map must be the same "
1214  "as the input Export/Import object's source Map.");
1215  }
1216  else { // revOp == DoReverse
1217  const bool srcMapSameAsImportTgtMap =
1218  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1219  TEUCHOS_TEST_FOR_EXCEPTION
1220  (! srcMapSameAsImportTgtMap, std::invalid_argument,
1221  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1222  "communication, the source DistObject's Map must be the same "
1223  "as the input Export/Import object's target Map.");
1224  }
1225  }
1226  }
1227 
1228  Distributor& distor = transfer.getDistributor ();
1229  const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1230 
1231  TEUCHOS_TEST_FOR_EXCEPTION
1232  (debug && restrictedMode &&
1233  (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1234  transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1235  std::invalid_argument,
1236  "Tpetra::DistObject::" << modeString << ": Transfer object "
1237  "cannot have permutes in restricted mode.");
1238 
1239  // Do we need all communication buffers to live on host?
1240  if (verbose) {
1241  std::ostringstream os;
1242  os << *prefix << "doTransfer: Use new interface; "
1243  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
1244  std::cerr << os.str ();
1245  }
1246 
1247  using const_lo_dv_type =
1248  Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1249  const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1250  transfer.getPermuteToLIDs_dv () :
1251  transfer.getPermuteFromLIDs_dv ();
1252  const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1253  transfer.getPermuteFromLIDs_dv () :
1254  transfer.getPermuteToLIDs_dv ();
1255  const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1256  transfer.getRemoteLIDs_dv () :
1257  transfer.getExportLIDs_dv ();
1258  const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1259  transfer.getExportLIDs_dv () :
1260  transfer.getRemoteLIDs_dv ();
1261  const bool canTryAliasing = (revOp == DoForward) ?
1262  transfer.areRemoteLIDsContiguous() :
1263  transfer.areExportLIDsContiguous();
1264 
1265  size_t constantNumPackets = this->constantNumberOfPackets ();
1266 
1267  // We only need to send data if the combine mode is not ZERO.
1268  if (CM != ZERO) {
1269  if (constantNumPackets != 0) {
1270  // There are a constant number of packets per element. We
1271  // already know (from the number of "remote" (incoming)
1272  // elements) how many incoming elements we expect, so we can
1273  // resize the buffer accordingly.
1274  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1275  reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1276  }
1277 
1278  // Do we need to do communication (via doPostsAndWaits)?
1279  bool needCommunication = true;
1280 
1281  // This may be NULL. It will be used below.
1282  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1283 
1284  if (revOp == DoReverse && ! this->isDistributed ()) {
1285  needCommunication = false;
1286  }
1287  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1288  // is distributed requires a cast to DistObject. If it's not a
1289  // DistObject, then I'm not quite sure what to do. Perhaps it
1290  // would be more appropriate for SrcDistObject to have an
1291  // isDistributed() method. For now, I'll just assume that we
1292  // need to do communication unless the cast succeeds and the
1293  // source is not distributed.
1294  else if (revOp == DoForward && srcDistObj != NULL &&
1295  ! srcDistObj->isDistributed ()) {
1296  needCommunication = false;
1297  }
1298 
1299  if (! needCommunication) {
1300  if (verbose) {
1301  std::ostringstream os;
1302  os << *prefix << "Comm not needed; skipping" << endl;
1303  std::cerr << os.str ();
1304  }
1305  }
1306  else {
1307  distributorActor_.doWaits(distributorPlan);
1308 
1309  if (verbose) {
1310  std::ostringstream os;
1311  os << *prefix << "8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) << ", constantNumPackets " << constantNumPackets << endl;
1312  std::cerr << os.str ();
1313  }
1314  doUnpackAndCombine(remoteLIDs, constantNumPackets, CM, execution_space());
1315  } // if (needCommunication)
1316  } // if (CM != ZERO)
1317 
1318  if (verbose) {
1319  std::ostringstream os;
1320  os << *prefix << "9. Done!" << endl;
1321  std::cerr << os.str ();
1322  }
1323 
1324  if (verbose) {
1325  std::ostringstream os;
1326  os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1327  std::cerr << os.str ();
1328  }
1329  }
1330 
1331  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1332  void
1333  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1334  doPosts(const Details::DistributorPlan& distributorPlan,
1335  size_t constantNumPackets,
1336  bool commOnHost,
1337  std::shared_ptr<std::string> prefix,
1338  const bool canTryAliasing,
1339  const CombineMode CM)
1340  {
1343  using Kokkos::Compat::create_const_view;
1344  using std::endl;
1345 
1346  const bool verbose = Details::Behavior::verbose("DistObject");
1347 
1348  if (constantNumPackets == 0) { // variable num packets per LID
1349  if (verbose) {
1350  std::ostringstream os;
1351  os << *prefix << "7.1. Variable # packets / LID: first comm "
1352  << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1353  << endl;
1354  std::cerr << os.str ();
1355  }
1356  size_t totalImportPackets = 0;
1357  if (commOnHost) {
1358  if (this->numExportPacketsPerLID_.need_sync_host ()) {
1359  this->numExportPacketsPerLID_.sync_host ();
1360  }
1361  if (this->numImportPacketsPerLID_.need_sync_host ()) {
1362  this->numImportPacketsPerLID_.sync_host ();
1363  }
1364  this->numImportPacketsPerLID_.modify_host (); // out arg
1365  auto numExp_h =
1366  create_const_view (this->numExportPacketsPerLID_.view_host ());
1367  auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1368 
1369  // MPI communication happens here.
1370  if (verbose) {
1371  std::ostringstream os;
1372  os << *prefix << "Call doPostsAndWaits"
1373  << endl;
1374  std::cerr << os.str ();
1375  }
1376  distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1377 
1378  if (verbose) {
1379  std::ostringstream os;
1380  os << *prefix << "Count totalImportPackets" << std::endl;
1381  std::cerr << os.str ();
1382  }
1383  using the_dev_type = typename decltype (numImp_h)::device_type;
1384  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1385  }
1386  else { // ! commOnHost
1387  this->numExportPacketsPerLID_.sync_device ();
1388  this->numImportPacketsPerLID_.sync_device ();
1389  this->numImportPacketsPerLID_.modify_device (); // out arg
1390  auto numExp_d = create_const_view
1391  (this->numExportPacketsPerLID_.view_device ());
1392  auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1393 
1394  // MPI communication happens here.
1395  if (verbose) {
1396  std::ostringstream os;
1397  os << *prefix << "Call doPostsAndWaits"
1398  << endl;
1399  std::cerr << os.str ();
1400  }
1401 
1402  distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1403 
1404  if (verbose) {
1405  std::ostringstream os;
1406  os << *prefix << "Count totalImportPackets" << std::endl;
1407  std::cerr << os.str ();
1408  }
1409  using the_dev_type = typename decltype (numImp_d)::device_type;
1410  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1411  }
1412 
1413  if (verbose) {
1414  std::ostringstream os;
1415  os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1416  std::cerr << os.str ();
1417  }
1418  this->reallocImportsIfNeeded (totalImportPackets, verbose,
1419  prefix.get (), canTryAliasing, CM);
1420  if (verbose) {
1421  std::ostringstream os;
1422  os << *prefix << "7.3. Second comm" << std::endl;
1423  std::cerr << os.str ();
1424  }
1425 
1426  // mfh 04 Feb 2019: Distributor expects the "num packets per
1427  // LID" arrays on host, so that it can issue MPI sends and
1428  // receives correctly.
1429  this->numExportPacketsPerLID_.sync_host ();
1430  this->numImportPacketsPerLID_.sync_host ();
1431 
1432  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1433  // doReversePostsAndWaits currently want
1434  // numExportPacketsPerLID and numImportPacketsPerLID as
1435  // Teuchos::ArrayView, rather than as Kokkos::View.
1436  //
1437  // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1438  // device. The above syncs might.
1439  auto numExportPacketsPerLID_av =
1440  getArrayViewFromDualView (this->numExportPacketsPerLID_);
1441  auto numImportPacketsPerLID_av =
1442  getArrayViewFromDualView (this->numImportPacketsPerLID_);
1443 
1444  // imports_ is for output only, so we don't need to sync it
1445  // before marking it as modified. However, in order to
1446  // prevent spurious debug-mode errors (e.g., "modified on
1447  // both device and host"), we first need to clear its
1448  // "modified" flags.
1449  this->imports_.clear_sync_state ();
1450 
1451  if (verbose) {
1452  std::ostringstream os;
1453  os << *prefix << "Comm on "
1454  << (commOnHost ? "host" : "device")
1455  << "; call doPosts" << endl;
1456  std::cerr << os.str ();
1457  }
1458 
1459  if (commOnHost) {
1460  this->imports_.modify_host ();
1461  distributorActor_.doPosts
1462  (distributorPlan,
1463  create_const_view (this->exports_.view_host ()),
1464  numExportPacketsPerLID_av,
1465  this->imports_.view_host (),
1466  numImportPacketsPerLID_av);
1467  }
1468  else { // pack on device
1469  Kokkos::fence("DistObject::doPosts-1"); // for UVM
1470  this->imports_.modify_device ();
1471  distributorActor_.doPosts
1472  (distributorPlan,
1473  create_const_view (this->exports_.view_device ()),
1474  numExportPacketsPerLID_av,
1475  this->imports_.view_device (),
1476  numImportPacketsPerLID_av);
1477  }
1478  }
1479  else { // constant number of packets per LID
1480  if (verbose) {
1481  std::ostringstream os;
1482  os << *prefix << "7.1. Const # packets per LID: " << endl
1483  << *prefix << " "
1484  << dualViewStatusToString (this->exports_, "exports_")
1485  << endl
1486  << *prefix << " "
1487  << dualViewStatusToString (this->exports_, "imports_")
1488  << endl;
1489  std::cerr << os.str ();
1490  }
1491  // imports_ is for output only, so we don't need to sync it
1492  // before marking it as modified. However, in order to
1493  // prevent spurious debug-mode errors (e.g., "modified on
1494  // both device and host"), we first need to clear its
1495  // "modified" flags.
1496  this->imports_.clear_sync_state ();
1497 
1498  if (verbose) {
1499  std::ostringstream os;
1500  os << *prefix << "7.2. Comm on "
1501  << (commOnHost ? "host" : "device")
1502  << "; call doPosts" << endl;
1503  std::cerr << os.str ();
1504  }
1505  if (commOnHost) {
1506  this->imports_.modify_host ();
1507  distributorActor_.doPosts
1508  (distributorPlan,
1509  create_const_view (this->exports_.view_host ()),
1510  constantNumPackets,
1511  this->imports_.view_host ());
1512  }
1513  else { // pack on device
1514  Kokkos::fence("DistObject::doPosts-2"); // for UVM
1515  this->imports_.modify_device ();
1516  distributorActor_.doPosts
1517  (distributorPlan,
1518  create_const_view (this->exports_.view_device ()),
1519  constantNumPackets,
1520  this->imports_.view_device ());
1521  } // commOnHost
1522  } // constant or variable num packets per LID
1523  }
1524 
1525  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1526  void
1527  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1528  doPackAndPrepare(const SrcDistObject& src,
1529  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1530  size_t& constantNumPackets,
1531  const execution_space &space)
1532  {
1533  using Details::ProfilingRegion;
1534  using std::endl;
1535  const bool debug = Details::Behavior::debug("DistObject");
1536 
1537  ProfilingRegion region_pp
1538  ("Tpetra::DistObject::doPackAndPrepare");
1539 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1540  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1541  // favor of Kokkos profiling.
1542  Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1543 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1544 
1545  // Ask the source to pack data. Also ask it whether there are
1546  // a constant number of packets per element
1547  // (constantNumPackets is an output argument). If there are,
1548  // constantNumPackets will come back nonzero. Otherwise, the
1549  // source will fill the numExportPacketsPerLID_ array.
1550 
1551  // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1552  // Alternately, make packAndPrepare take a "commOnHost"
1553  // argument to tell it where to leave the data?
1554  //
1555  // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1556  // the freedom to pack and unpack either on host or device.
1557  // We should prefer sync'ing only on demand. Thus, we can
1558  // answer the above question: packAndPrepare should not
1559  // take a commOnHost argument, and doTransferNew should sync
1560  // where needed, if needed.
1561  if (debug) {
1562  std::ostringstream lclErrStrm;
1563  bool lclSuccess = false;
1564  try {
1565  this->packAndPrepare (src, exportLIDs, this->exports_,
1566  this->numExportPacketsPerLID_,
1567  constantNumPackets, space);
1568  lclSuccess = true;
1569  }
1570  catch (std::exception& e) {
1571  lclErrStrm << "packAndPrepare threw an exception: "
1572  << endl << e.what();
1573  }
1574  catch (...) {
1575  lclErrStrm << "packAndPrepare threw an exception "
1576  "not a subclass of std::exception.";
1577  }
1578  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1579  "threw an exception in packAndPrepare on "
1580  "one or more processes in the DistObject's communicator.";
1581  auto comm = getMap()->getComm();
1582  Details::checkGlobalError(std::cerr, lclSuccess,
1583  lclErrStrm.str().c_str(),
1584  gblErrMsgHeader, *comm);
1585  }
1586  else {
1587  this->packAndPrepare (src, exportLIDs, this->exports_,
1588  this->numExportPacketsPerLID_,
1589  constantNumPackets, space);
1590  }
1591  }
1592 
1593  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1594  void
1595  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1596  doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1597  size_t constantNumPackets,
1598  CombineMode CM,
1599  const execution_space &space)
1600  {
1601  using Details::ProfilingRegion;
1602  using std::endl;
1603  const bool debug = Details::Behavior::debug("DistObject");
1604 
1605  ProfilingRegion region_uc
1606  ("Tpetra::DistObject::doUnpackAndCombine");
1607 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1608  // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1609  // favor of Kokkos profiling.
1610  Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1611 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1612 
1613  if (debug) {
1614  std::ostringstream lclErrStrm;
1615  bool lclSuccess = false;
1616  try {
1617  this->unpackAndCombine (remoteLIDs, this->imports_,
1618  this->numImportPacketsPerLID_,
1619  constantNumPackets, CM, space);
1620  lclSuccess = true;
1621  }
1622  catch (std::exception& e) {
1623  lclErrStrm << "doUnpackAndCombine threw an exception: "
1624  << endl << e.what();
1625  }
1626  catch (...) {
1627  lclErrStrm << "doUnpackAndCombine threw an exception "
1628  "not a subclass of std::exception.";
1629  }
1630  const char gblErrMsgHeader[] = "Tpetra::DistObject "
1631  "threw an exception in unpackAndCombine on "
1632  "one or more processes in the DistObject's communicator.";
1633  auto comm = getMap()->getComm();
1634  Details::checkGlobalError(std::cerr, lclSuccess,
1635  lclErrStrm.str().c_str(),
1636  gblErrMsgHeader, *comm);
1637  }
1638  else {
1639  this->unpackAndCombine (remoteLIDs, this->imports_,
1640  this->numImportPacketsPerLID_,
1641  constantNumPackets, CM, space);
1642  }
1643  }
1644 
1645  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1646  void
1649  (const SrcDistObject&,
1650  const size_t,
1651  const Kokkos::DualView<
1652  const local_ordinal_type*,
1654  const Kokkos::DualView<
1655  const local_ordinal_type*,
1657  const CombineMode CM)
1658  {}
1659 
1660 // clang-format on
1661 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1663  const SrcDistObject &source, const size_t numSameIDs,
1664  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1665  &permuteToLIDs,
1666  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1667  &permuteFromLIDs,
1668  const CombineMode CM, const execution_space &space) {
1669  /*
1670  This is called if the derived class doesn't know how to pack and prepare in
1671  an arbitrary execution space instance, but it was asked to anyway.
1672  Provide a safe illusion by actually doing the work in the default instance,
1673  and syncing the default instance with the provided instance.
1674  The caller expects
1675  1. any work in the provided instance to complete before this.
1676  2. This to complete before any following work in the provided instance.
1677  */
1678 
1679  space.fence(); // // TODO: Tpetra::Details::Spaces::exec_space_wait
1680  copyAndPermute(source, numSameIDs, permuteToLIDs, permuteFromLIDs,
1681  CM); // default instance
1682  execution_space().fence(); // TODO:
1683  // Tpetra::Details::Spaces::exec_space_wait
1684 }
1685 // clang-format off
1686 
1687 
1688  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1689  void
1692  (const SrcDistObject&,
1693  const Kokkos::DualView<
1694  const local_ordinal_type*,
1696  Kokkos::DualView<
1697  packet_type*,
1699  Kokkos::DualView<
1700  size_t*,
1702  size_t&)
1703  {}
1704 
1705 // clang-format on
1706 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1708  const SrcDistObject &source,
1709  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1710  &exportLIDs,
1711  Kokkos::DualView<packet_type *, buffer_device_type> &exports,
1712  Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1713  size_t &constantNumPackets, const execution_space &space) {
1714  /*
1715  This is called if the derived class doesn't know how to pack and prepare in
1716  an arbitrary execution space instance, but it was asked to anyway.
1717  Provide a safe illusion by actually doing the work in the default instance,
1718  and syncing the default instance with the provided instance.
1719 
1720  The caller expects
1721  1. any work in the provided instance to complete before this.
1722  2. This to complete before any following work in the provided instance.
1723  */
1724 
1725  // wait for any work from prior operations in the provided instance to
1726  // complete
1727  space.fence(); // TODO: Details::Spaces::exec_space_wait
1728 
1729  // pack and prepare in the default instance.
1730  packAndPrepare(source, exportLIDs, exports, numPacketsPerLID,
1731  constantNumPackets); // default instance
1732 
1733  // wait for the default instance to complete before returning, so any
1734  // following work inserted into the provided instance will be done after this
1735  execution_space().fence(); // TODO: Details::Spaces::exec_space_wait
1736 }
1737 // clang-format off
1738 
1739  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1740  void
1743  (const Kokkos::DualView<
1744  const local_ordinal_type*,
1745  buffer_device_type>& /* importLIDs */,
1746  Kokkos::DualView<
1747  packet_type*,
1748  buffer_device_type> /* imports */,
1749  Kokkos::DualView<
1750  size_t*,
1751  buffer_device_type> /* numPacketsPerLID */,
1752  const size_t /* constantNumPackets */,
1753  const CombineMode /* combineMode */)
1754  {}
1755 
1756 // clang-format on
1757 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1759  const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1760  &importLIDs,
1761  Kokkos::DualView<packet_type *, buffer_device_type> imports,
1762  Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1763  const size_t constantNumPackets, const CombineMode combineMode,
1764  const execution_space &space) {
1765  // Wait for any work in the provided space to complete
1766  space.fence(); // TODO: Details::Spaces::exec_space_wait(execution_space(),
1767  // space);
1768  unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets,
1769  combineMode); // default instance
1770  // wait for unpack to finish in the default instance, since the caller
1771  // may be expecting sequential semantics in the `space` instance
1772  execution_space().fence(); // TODO: Details::Spaces::exec_space_wait(space,
1773  // execution_space());
1774 }
1775 // clang-format off
1776 
1777 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1779  std::ostream &os) const {
1780  using std::endl;
1781  using Teuchos::FancyOStream;
1782  using Teuchos::getFancyOStream;
1783  using Teuchos::RCP;
1784  using Teuchos::rcpFromRef;
1785 
1786  RCP<FancyOStream> out = getFancyOStream(rcpFromRef(os));
1787  this->describe(*out, Teuchos::VERB_DEFAULT);
1788 }
1789 
1790 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1791 std::unique_ptr<std::string>
1793  const char className[], const char methodName[]) const {
1794  auto map = this->getMap();
1795  auto comm = map.is_null() ? Teuchos::null : map->getComm();
1796  return Details::createPrefix(comm.getRawPtr(), className, methodName);
1797 }
1798 
1799 template <class DistObjectType>
1801  Teuchos::RCP<DistObjectType> &input,
1802  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
1803  typename DistObjectType::global_ordinal_type,
1804  typename DistObjectType::node_type>> &newMap) {
1805  input->removeEmptyProcessesInPlace(newMap);
1806  if (newMap.is_null()) { // my process is excluded
1807  input = Teuchos::null;
1808  }
1809 }
1810 
1811 template <class DistObjectType>
1812 void removeEmptyProcessesInPlace(Teuchos::RCP<DistObjectType> &input) {
1813  auto newMap = input->getMap()->removeEmptyProcesses();
1814  removeEmptyProcessesInPlace<DistObjectType>(input, newMap);
1815 }
1816 
1817 // Explicit instantiation macro for general DistObject.
1818 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1819  template class DistObject<SCALAR, LO, GO, NODE>;
1820 
1821 // Explicit instantiation macro for DistObject<char, ...>.
1822 // The "SLGN" stuff above doesn't work for Packet=char.
1823 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1824  template class DistObject<char, LO, GO, NODE>;
1825 
1826 } // namespace Tpetra
1827 
1828 #endif // TPETRA_DISTOBJECT_DEF_HPP
1829 // clang-format on
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
const Details::DistributorPlan & getPlan() const
Get this Distributor&#39;s DistributorPlan.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object (&quot;forward mode&quot;).
typename::Kokkos::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
bool isDistributed() const
Whether this is a globally distributed object.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object&#39;s Map.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode)
Perform any unpacking and combining after communication.
static bool debug()
Whether Tpetra is in debug mode.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
void beginTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Implementation detail of doTransfer.
typename device_type::execution_space execution_space
The Kokkos execution space.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets)
Pack data and metadata for communication (sends).
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
LocalOrdinal local_ordinal_type
The type of local indices.
Replace old values with zero.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual std::string description() const
One-line descriptiion of this object.
bool transferArrived() const
Whether the data from an import/export operation has arrived, and is ready for the unpack and combine...
virtual size_t constantNumberOfPackets() const
Whether the implementation&#39;s instance promises always to have a constant number of packets per LID (l...
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Stand-alone utility functions and macros.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Base class for distributed Tpetra objects that support data redistribution.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Definition: Tpetra_Util.cpp:71
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object&#39;s Map.
Description of Tpetra&#39;s behavior.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.